diff --git a/bin/print_report.R b/bin/print_report.R index a361f811745168a6d8da3d87c879464c2c5c49b3..51c74ccf27086998d6ccb6b178e17d904302fba8 100755 --- a/bin/print_report.R +++ b/bin/print_report.R @@ -1,4 +1,4 @@ - +#!/usr/bin/env Rscript ######################################################################### ## ## diff --git a/dev/test.config b/dev/test.config index 88bfc44822d5b34d0275724ef655590c7ff7fc50..4d5b98b664d28d7ac1d067dcdf6d8d851b1ac718 100755 --- a/dev/test.config +++ b/dev/test.config @@ -21,13 +21,13 @@ env { //in_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset" //in_path="/mnt/share/14985_loot/dataset/B2699/00_Rawdata" //in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/3" // where initial fastq file is - in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/" + in_path="$baseDir/dataset/" //in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata" // where initial fastq file is fastq_file="test.fastq2.gz" // fastq file name //fastq_file="4-4_S1_L001_R1_001.fastq.gz" //fastq_file="3-4_S1_L001_R1_001.fastq.gz" //primer_fasta="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" - primer_fasta="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads + primer_fasta="$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads //primer_fasta="/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" //// end path and files @@ -46,7 +46,7 @@ env { //// end fivep_filtering cutoff_nb=25 // reads of length cutoff_nb after trimming are removed //ref_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" - ref_path="/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome + ref_path="$baseDir/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome ref_file="Ecoli-K12-MG1655_ORI_CENTERED.fasta" // fasta file of the reference genome ori_coord="2320711 2320942" // [2320711, 2320942] // Ecoli centered coordinates ter_coord="4627368 4627400" //[4627368, 4627400] // Ecoli centered coordinates @@ -67,7 +67,7 @@ env { //// must be also exported system_exec = 'slurm' // the system that runs the workflow. Either 'local' or 'slurm' //out_path="/mnt/c/Users/Gael/Desktop" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop' -out_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop' +out_path="$baseDir/results/" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop' //// end must be also exported //// general variables diff --git a/main.nf b/main.nf index bde636e650248139c604b3c18484f756a7d90d92..11fa7db5cb89c5dc09112781b4663a57bab2aa13 100755 --- a/main.nf +++ b/main.nf @@ -26,6 +26,9 @@ config_file = file("${projectDir}/nextflow.config") log_file = file("${launchDir}/.nextflow.log") file_name = file("${in_path}/${fastq_file}").baseName ref_name = file("${ref_path}/${ref_file}").baseName +primer = file(primer_fasta) +ref = file("${ref_path}/${ref_file}") +sum_of_2_nb = fivep_seq_nb.toInteger()+added_nb.toInteger() modules = params.modules // remove the dot -> can be used in bash scripts //////// end Variables @@ -43,27 +46,9 @@ ref_ch_test = file("${ref_path}/${ref_file}") // to test if exist below //////// Channels fastq_ch = Channel.fromPath("${in_path}/${fastq_file}", checkIfExists: false) // I could use true, but I prefer to perform the check below, in order to have a more explicit error message -primer_ch = Channel.fromPath("${primer_fasta}", checkIfExists: false) // I could use true, but I prefer to perform the check below, in order to have a more explicit error message -Channel.fromPath("${ref_path}/${ref_file}", checkIfExists: false).into{ref_ch1 ; ref_ch2 ; ref_ch3} // I could use true, but I prefer to perform the check below, in order to have a more explicit error message -alien_l_param_ch = Channel.from("${alientrimmer_l_param}") -Channel.from("${attc_seq}").into{attc_seq_ch1 ; attc_seq_ch2} -fivep_seq_filtering_ch = Channel.from("${fivep_seq_filtering}") -fivep_seq_nb_ch = Channel.from("${fivep_seq_nb}") -added_nb_ch = Channel.from("${added_nb}") -sum_ch = Channel.from("${fivep_seq_nb}", "${added_nb}").toInteger().sum() -Channel.from("${cute_path}").into{cute_ch1 ; cute_ch2 ; cute_ch3 ; cute_ch4 ; cute_ch5 ; cute_ch6 ; cute_ch7 ; cute_ch8 ; cute_ch9} -cutoff_nb_ch = Channel.from("${cutoff_nb}") -Channel.from("${ori_coord}").into{ori_coord_ch1 ; ori_coord_ch2} -Channel.from("${ter_coord}").into{ter_coord_ch1 ; ter_coord_ch2} -Channel.from("${color_coverage}").into{color_coverage_ch1 ; color_coverage_ch2} -Channel.from("${xlab}").into{xlab_ch1 ; xlab_ch2} -Channel.from("${ref_name}").into{ref_name_ch1 ; ref_name_ch2} -Channel.from("${genome_size}").into{genome_size_ch1 ; genome_size_ch2} //////// end Channels - - //////// Checks if(system_exec == 'local' || system_exec == 'slurm'){ @@ -91,7 +76,7 @@ if(system_exec == 'local' || system_exec == 'slurm'){ process init { label 'bash' // see the withLabel: bash in the nextflow config file - cache 'true' + cache 'false' output: file "report.rmd" into log_ch0 @@ -145,8 +130,8 @@ process trim { // Trim the oligo sequences. See section 8.4 of the labbook 20200 input: val file_name file gz from fastq_Nremove_ch - file pr from primer_ch - val alien_l_param from alien_l_param_ch + file pr from primer + val alien_l_param from alientrimmer_l_param output: file "${file_name}_trim.fq" into fastq_trim_ch1, fastq_trim_ch2 @@ -189,11 +174,11 @@ process fivep_filtering { // section 8.6 to 8.13 of the labbook 20200520. Instea input: val file_name file fq from fastq_trim_ch2 - val fivep_seq_filtering from fivep_seq_filtering_ch - val attc_seq from attc_seq_ch1 - val fivep_seq_nb from fivep_seq_nb_ch - val added_nb from added_nb_ch - val sum_of_2_nb from sum_ch + val fivep_seq_filtering + val attc_seq + val fivep_seq_nb + val added_nb + val sum_of_2_nb output: file "${file_name}_5pAtccRm.fq" into fastq_5p_filter_ch1, fastq_5p_filter_ch2 @@ -227,8 +212,8 @@ process plot_fivep_filtering_stat { // section 8.7 to 8.11 of the labbook 202005 input: tuple val(nouse), file(stat) from stat_fastq_5p_filter_ch1 - val attc_seq from attc_seq_ch2 - val cute from cute_ch1 + val attc_seq + val cute from cute_path output: file "plot_fivep_filtering_stat.png" into fig_ch1 @@ -261,7 +246,7 @@ process plot_read_length_ini { // section 8.8 of the labbook 20200520 input: file length from length_fastq_ini_ch - val cute from cute_ch2 + val cute from cute_path output: file "plot_read_length_ini.png" into fig_ch2 @@ -290,7 +275,7 @@ process plot_read_length_fivep_filtering { // section 8.12 of the labbook 202005 input: file length from length_fastq_5p_filter_ch - val cute from cute_ch3 + val cute from cute_path output: file "plot_read_length_fivep_filtering.png" into fig_ch3 @@ -318,7 +303,7 @@ process cutoff { // section 8.16 of the labbook 20200520 input: val file_name file fq from fastq_5p_filter_ch2 - val nb from cutoff_nb_ch + val nb from cutoff_nb output: file "${file_name}_cutoff.fq" into cutoff_ch @@ -340,7 +325,7 @@ process plot_read_length_cutoff { // section 8.17 of the labbook 20200520 input: file length from length_cutoff_ch - val cute from cute_ch4 + val cute from cute_path output: file "plot_read_length_cutoff.png" into fig_ch4 @@ -394,7 +379,7 @@ process bowtie2 { // section 24.1 of the labbook 20200707 val file_name val ref_name file fq from cutoff_ch - file ref from ref_ch1 + file ref output: file "${file_name}_bowtie2.bam" into bowtie2_ch1, bowtie2_ch2 @@ -503,7 +488,7 @@ process duplicate_removal { // section 24.5 of the labbook 20200707. Warning: US input: val file_name file bam from q20_ch2 - file ref from ref_ch2 + file ref output: file "${file_name}_q20_nodup.bam" into dup_ch1, dup_ch2 @@ -585,11 +570,11 @@ process plot_coverage { // section 24.6 of the labbook 20200707 val file_name file cov from cov_ch // warning: several files file read_nb from bow_read_nb_ch.concat(q20_read_nb_ch, dup_read_nb_ch) - val ori_coord from ori_coord_ch1.first() - val ter_coord from ter_coord_ch1.first() - val color_coverage from color_coverage_ch1.first() - val xlab from xlab_ch1.first() - val cute from cute_ch5.first() + val ori_coord + val ter_coord + val color_coverage + val xlab + val cute from cute_path output: file "plot_${cov.baseName}.png" into fig_ch5 // warning: several files @@ -661,11 +646,11 @@ process plot_insertion { // sections 24.7.2, 44.1 and 45.1 of the labbook 202005 input: val file_name file pos from orient_ch1 - val ori_coord from ori_coord_ch2 - val ter_coord from ter_coord_ch2 - val xlab from xlab_ch2 - val genome_size from genome_size_ch1 - val cute from cute_ch7 + val ori_coord + val ter_coord + val xlab + val genome_size + val cute from cute_path output: file "*.png" into fig_ch6 @@ -755,8 +740,8 @@ process print_report { // section 8.8 of the labbook 20200520 input: val file_name - val cute from cute_ch6 - file report from log_ch1.concat(log_ch2, log_ch3, log_ch4, log_ch5, log_ch6, log_ch7, log_ch8, log_ch9, log_ch10, log_ch11, log_ch12, log_ch13, log_ch14, log_ch15, log_ch16, log_ch17, log_ch18, log_ch19).collectFile(name: 'report.rmd', sort: false) + val cute from cute_path + file report from log_ch0.concat(log_ch1,log_ch2, log_ch3, log_ch4, log_ch5, log_ch6, log_ch7, log_ch8, log_ch9, log_ch10, log_ch11, log_ch12, log_ch13, log_ch14, log_ch15, log_ch16, log_ch17, log_ch18, log_ch19).collectFile(name: 'report.rmd', sort: false) tuple val ("stat_tempo_name"), file ("stat_tempo") from stat_fastq_5p_filter_ch2 file "plot_fivep_filtering_stat" from fig_ch1 file "plot_read_length_ini" from fig_ch2.first() @@ -772,7 +757,7 @@ process print_report { // section 8.8 of the labbook 20200520 script: """ - #cp tempo_report report.rmd # this is to get hard files, not symlinks + cp ${report} report_file.rmd # this is to get hard files, not symlinks mkdir figures mkdir files mkdir reports @@ -784,7 +769,7 @@ process print_report { // section 8.8 of the labbook 20200520 cp ${png} ./figures/ # Warning several files are copied using their initial names, i.e., the names they have in each work folders of the plot_coverage process cp ${png2} ./figures/ # Warning several files cp ${plot_read_length_cutoff} ./reports/nf_dag.png # trick to delude the knitting during the print report - Rscript $workflow.projectDir/bin/print_report.R "${cute}" "${report}" "print_report.txt" + print_report.R "${cute}" "report_file.rmd" "print_report.txt" """ }