diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..5c40886496d66243c75fa01bd6b4c5f54e45529c --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +work/ +singularity/ +results/ +.nextflow/ +.nextflow* +dataset/ +OFNOTE.txt +dev/ diff --git a/README.md b/README.md index 846fb353ecfdbc1a0cb7b5cfb18006b21eb5576c..e612e0b14f223a6238723e0819034e8a8ec87271 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ ### 1. Prerequisite Installation of:<br /> -[nextflow DSL2](https://gael-millot.github.io/protocols/docs/Protocol%20152-rev0%20DSL2.html#_Toc159933761)<br /> +[nextflow DSL1](https://gael-millot.github.io/protocols/docs/Protocol%20152-rev0%20DSL2.html#_Toc159933761)<br /> [Graphviz](https://www.graphviz.org/download/), `sudo apt install graphviz` for Linux ubuntu<br /> [Apptainer](https://gael-millot.github.io/protocols/docs/Protocol%20135-rev0%20APPTAINER.html#_Toc160091693)<br /> diff --git a/main - Copy.nf b/main - Copy.nf deleted file mode 100644 index b6579baa59ba249ea6ef4b527795d8007365dd8e..0000000000000000000000000000000000000000 --- a/main - Copy.nf +++ /dev/null @@ -1,53 +0,0 @@ - -process columnSorting { - label 'bash' // see the withLabel: bash in the nextflow config file - publishDir "${out_path}", mode: 'copy', overwrite: false - cache 'false' - - //no channel input here for the vcf, because I do not transform it - input: - file vcf_gz from vcf_ch3 - val header_line_nb from header_line_nb_ch - - - output: - file "tempo.gz" - // file "tempo.gz" to have the file in the output result file - - script: - """ - zcat ${vcf_gz} | awk -v line=${header_line_nb} -v aff="$affected_patients_col_nb" -v unaff="$unaffected_patients_col_nb" ' - BEGIN{ - FS="\\t" - split(aff, array1, " ") # aff="10 12 14" and is added into the array1 array such that index 1 points to 10, index 2 to 12 and index 3 to 14 - for(i in array1){ - aff_arr[array1[i]]++ # a new aff_arr array is created such that the first index is 10 and points to 1, the second is 12 and points to 2, etc. - } - split(unaff, array2, " ") - for(i in array2){ - unaff_arr[array2[i]]++ - } - }{ - if(NR < line){ - print \$0 - }else{ - OFS="" - ORS="" - for(i=1;i<=NF;i++){ - if( ! (i in aff_arr) && ! ( i in unff_arr)){ # warning: ! is only for the next (), not after. In addition, the in operator on arrays checks the array index. Thus, creating aff_arr was the simpliest way to have i in "12 14 16" - print \$i"\\t" - } - } - for(i in array1){ - print \$array1[i]"\\t" - } - for(i in array2){ - print \$array2[i]"\\t" - } - print "\\n" - } - }' | gzip > tempo.gz - """ - // warning: ${in_path}/${sample_name} instead of ${vcf_gz} does not work, because the file used is not here anymore (do not use path in nextflow) -} - diff --git a/main.nf b/main.nf index d2bce4aad00af5b80e68e9ce9595f04dd1ba84d9..74b0cc595aa48dfdfe4c19abffed5ad88860748a 100644 --- a/main.nf +++ b/main.nf @@ -1,3 +1,4 @@ +nextflow.enable.dsl=1 /* ######################################################################### ## ## @@ -65,7 +66,7 @@ patient_name_ch = Channel.value(["affected_patients", "unaffected_patients"]) // if(system_exec == 'local' || system_exec == 'slurm'){ def file_exists1 = in_path_test.exists() if( ! file_exists1){ - error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID sample_path PARAMETER IN nextflow.config FILE: ${sample_path}\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n" + error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID in_path_test PARAMETER IN nextflow.config FILE: ${in_path_test}\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n" } def file_exists2 = log_file.exists() if( ! file_exists2){ diff --git a/nextflow.config b/nextflow.config index fb3a8c8d5b3e4d14512fa35271f6dfa6da06c471..d6a97eb1be23bb72e14fd28ae0426dc49ddb9269 100644 --- a/nextflow.config +++ b/nextflow.config @@ -35,7 +35,7 @@ env { git_path="https://gitlab.pasteur.fr/Gael/08002_bourgeron/" //in_path="/pasteur/gaia/projets/p02/ghfc_wgs/Transfert/Dyslexia" - in_path="/mnt/share/Users/Gael/Documents/Git_projects/08002_bourgeron/dataset" + in_path="/mnt/c/Users/gmillot/Documents/Git_projects/nf_column_reorder/dataset" //sample_name="Dyslexia.gatk-vqsr.splitted.norm.vep.merged.vcf.gz" sample_name="test.vcf.gz" // Dyslexia.gatk-vqsr.splitted.norm.vep.merged_first_10000.vcf.gz" //Warning: do not write the out_path now. See below. If written here, the one below is not considered" @@ -153,7 +153,7 @@ dag { // define apptainer parameters -apptainer { +singulariry { enabled = true autoMounts = true // automatically mounts host paths in the executed container if(system_exec == 'slurm' || system_exec == 'slurm_local'){