From f798539f12d4640e3e255d36f24f4bced8aad0e0 Mon Sep 17 00:00:00 2001 From: gmillot <gael.millot@pasteur.fr> Date: Tue, 3 Dec 2024 13:04:09 +0100 Subject: [PATCH] debuuged --- .gitignore | 8 ++++++++ README.md | 2 +- main - Copy.nf | 53 ------------------------------------------------- main.nf | 3 ++- nextflow.config | 4 ++-- 5 files changed, 13 insertions(+), 57 deletions(-) create mode 100644 .gitignore delete mode 100644 main - Copy.nf diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5c40886 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +work/ +singularity/ +results/ +.nextflow/ +.nextflow* +dataset/ +OFNOTE.txt +dev/ diff --git a/README.md b/README.md index 846fb35..e612e0b 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ### 1. Prerequisite Installation of:<br /> -[nextflow DSL2](https://gael-millot.github.io/protocols/docs/Protocol%20152-rev0%20DSL2.html#_Toc159933761)<br /> +[nextflow DSL1](https://gael-millot.github.io/protocols/docs/Protocol%20152-rev0%20DSL2.html#_Toc159933761)<br /> [Graphviz](https://www.graphviz.org/download/), `sudo apt install graphviz` for Linux ubuntu<br /> [Apptainer](https://gael-millot.github.io/protocols/docs/Protocol%20135-rev0%20APPTAINER.html#_Toc160091693)<br /> diff --git a/main - Copy.nf b/main - Copy.nf deleted file mode 100644 index b6579ba..0000000 --- a/main - Copy.nf +++ /dev/null @@ -1,53 +0,0 @@ - -process columnSorting { - label 'bash' // see the withLabel: bash in the nextflow config file - publishDir "${out_path}", mode: 'copy', overwrite: false - cache 'false' - - //no channel input here for the vcf, because I do not transform it - input: - file vcf_gz from vcf_ch3 - val header_line_nb from header_line_nb_ch - - - output: - file "tempo.gz" - // file "tempo.gz" to have the file in the output result file - - script: - """ - zcat ${vcf_gz} | awk -v line=${header_line_nb} -v aff="$affected_patients_col_nb" -v unaff="$unaffected_patients_col_nb" ' - BEGIN{ - FS="\\t" - split(aff, array1, " ") # aff="10 12 14" and is added into the array1 array such that index 1 points to 10, index 2 to 12 and index 3 to 14 - for(i in array1){ - aff_arr[array1[i]]++ # a new aff_arr array is created such that the first index is 10 and points to 1, the second is 12 and points to 2, etc. - } - split(unaff, array2, " ") - for(i in array2){ - unaff_arr[array2[i]]++ - } - }{ - if(NR < line){ - print \$0 - }else{ - OFS="" - ORS="" - for(i=1;i<=NF;i++){ - if( ! (i in aff_arr) && ! ( i in unff_arr)){ # warning: ! is only for the next (), not after. In addition, the in operator on arrays checks the array index. Thus, creating aff_arr was the simpliest way to have i in "12 14 16" - print \$i"\\t" - } - } - for(i in array1){ - print \$array1[i]"\\t" - } - for(i in array2){ - print \$array2[i]"\\t" - } - print "\\n" - } - }' | gzip > tempo.gz - """ - // warning: ${in_path}/${sample_name} instead of ${vcf_gz} does not work, because the file used is not here anymore (do not use path in nextflow) -} - diff --git a/main.nf b/main.nf index d2bce4a..74b0cc5 100644 --- a/main.nf +++ b/main.nf @@ -1,3 +1,4 @@ +nextflow.enable.dsl=1 /* ######################################################################### ## ## @@ -65,7 +66,7 @@ patient_name_ch = Channel.value(["affected_patients", "unaffected_patients"]) // if(system_exec == 'local' || system_exec == 'slurm'){ def file_exists1 = in_path_test.exists() if( ! file_exists1){ - error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID sample_path PARAMETER IN nextflow.config FILE: ${sample_path}\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n" + error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID in_path_test PARAMETER IN nextflow.config FILE: ${in_path_test}\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n" } def file_exists2 = log_file.exists() if( ! file_exists2){ diff --git a/nextflow.config b/nextflow.config index fb3a8c8..d6a97eb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,7 +35,7 @@ env { git_path="https://gitlab.pasteur.fr/Gael/08002_bourgeron/" //in_path="/pasteur/gaia/projets/p02/ghfc_wgs/Transfert/Dyslexia" - in_path="/mnt/share/Users/Gael/Documents/Git_projects/08002_bourgeron/dataset" + in_path="/mnt/c/Users/gmillot/Documents/Git_projects/nf_column_reorder/dataset" //sample_name="Dyslexia.gatk-vqsr.splitted.norm.vep.merged.vcf.gz" sample_name="test.vcf.gz" // Dyslexia.gatk-vqsr.splitted.norm.vep.merged_first_10000.vcf.gz" //Warning: do not write the out_path now. See below. If written here, the one below is not considered" @@ -153,7 +153,7 @@ dag { // define apptainer parameters -apptainer { +singulariry { enabled = true autoMounts = true // automatically mounts host paths in the executed container if(system_exec == 'slurm' || system_exec == 'slurm_local'){ -- GitLab