Commit cfada439 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

v7.7.0 release: Kraken added and multiQC fixed

parent 005eedc2
......@@ -228,6 +228,16 @@ Time loading reference: 00:00:00
<br />Overall time: 00:00:00
<br /><br />
### MultiQC
Results are published in the [Report](./reports/multiqc_report.html) folder
<br /><br />
### Q20 filtering
......@@ -360,7 +370,7 @@ In each sequence of length 40 <br />position 21 corresponds to the first nucleot
</center>
![Figure 8: test.fastq2_LEADING_16](./figures/logo_test.fastq2_LEADING_16.png){width=600}
![Figure 8: test.fastq2_LEADING_0](./figures/logo_test.fastq2_LEADING_0.png){width=600}
</center>
......@@ -375,7 +385,7 @@ In each sequence of length 40 <br />position 21 corresponds to the first nucleot
</center>
![Figure 9: test.fastq2_LEADING_0](./figures/logo_test.fastq2_LEADING_0.png){width=600}
![Figure 9: test.fastq2_LAGGING_0](./figures/logo_test.fastq2_LAGGING_0.png){width=600}
</center>
......@@ -390,7 +400,7 @@ In each sequence of length 40 <br />position 21 corresponds to the first nucleot
</center>
![Figure 10: test.fastq2_LAGGING_0](./figures/logo_test.fastq2_LAGGING_0.png){width=600}
![Figure 10: test.fastq2_LEADING_16](./figures/logo_test.fastq2_LEADING_16.png){width=600}
</center>
......@@ -841,11 +851,11 @@ Full .nextflow.log is in: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot<br
Project (empty means no .git folder where the main.nf file is present): loot https://gitlab.pasteur.fr/gmillot/14985_loot (fetch)
<br />Git info (empty means no .git folder where the main.nf file is present): v7.5.0-dirty
<br />Git info (empty means no .git folder where the main.nf file is present): v7.6.0-2-g005eedc277-dirty
<br />Cmd line: nextflow run main.nf -resume
<br />execution mode: local
<br />Manifest's pipeline version: null
<br />result path: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot/results/20220120_res_CL14985_B4985_4_1645109072
<br />result path: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot/results/20220120_res_CL14985_B4985_4_1645462141
<br />nextflow version: 21.04.2
......@@ -862,7 +872,7 @@ workDir (directory where tasks temporary files are created): /mnt/c/Users/Gael/D
USER VARIABLES:<br />
out_path: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot/results/20220120_res_CL14985_B4985_4_1645109072<br />
out_path: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot/results/20220120_res_CL14985_B4985_4_1645462141<br />
in_path: /mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset
......
......@@ -12,7 +12,7 @@
2022-02-17 14:41:48
2022-02-21 16:52:41
......@@ -31,7 +31,7 @@
END TIME: 2022-02-17 14:41:48
END TIME: 2022-02-21 16:52:42
......@@ -118,7 +118,7 @@ loaded via a namespace (and not attached):
################################ JOB END
TIME: 2022-02-17 14:41:48
TIME: 2022-02-21 16:52:42
TOTAL TIME LAPSE: 0S
......
......@@ -30,7 +30,9 @@ ref_name = file("${ref_path}/${ref_file}").baseName
primer = file(primer_fasta)
ref = file("${ref_path}/${ref_file}")
sum_of_2_nb = fivep_seq_nb.toInteger()+added_nb.toInteger()
k2db = file("${kraken_db}")
if(system_exec == 'slurm'){
k2db = file("${kraken_db}")
}
// end from the nextflow.config file
......@@ -46,7 +48,9 @@ modules = params.modules // remove the dot -> can be used in bash scripts
fastq_ch_test = file("${in_path}/${fastq_file}") // to test if exist below
primer_ch_test = file("${primer_fasta}") // to test if exist below
ref_ch_test = file("${ref_path}/${ref_file}") // to test if exist below
k2db_ch_test = file("${kraken_db}") // to test if exist below
if(system_exec == 'slurm'){
k2db_ch_test = file("${kraken_db}") // to test if exist below
}
//////// end Variables from config.file that need to be checked
......@@ -73,10 +77,12 @@ if(system_exec == 'local' || system_exec == 'slurm'){
if( ! file_exists3){
error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID ref_path: ${ref_path}\nOR ref_file: ${ref_file}\nPARAMETER IN nextflow.config FILE\n\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n"
}
if(system_exec == 'slurm'){
def file_exists4 = k2db_ch_test.exists()
if( ! file_exists4){
error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID kraken db path: ${kraken_db}\nPARAMETER IN nextflow.config FILE\n\nIF POINTING TO A DISTANT SERVER, CHECK THAT IT IS MOUNTED\n\n========\n\n"
}
}
}else{
error "\n\n========\n\nERROR IN NEXTFLOW EXECUTION\n\nINVALID system_exec PARAMETER IN nextflow.config FILE: ${system_exec}\n\n========\n\n"
}
......@@ -161,15 +167,22 @@ process kraken {
input:
file fastq from fastq_trim_ch3
if(system_exec == 'slurm'){
file k2db
}
output:
file "${fastq.baseName}_kraken_std.txt" into krakenreports
script:
if(system_exec == 'slurm')
"""
kraken2 --db ${k2db} --threads ${task.cpus} --report ${fastq.baseName}_kraken_std.txt ${fastq} > ${fastq.baseName}.kraken2
"""
else
"""
echo "No kraken analysis performed in local running" > ${fastq.baseName}_kraken_std.txt
"""
}
......@@ -439,17 +452,20 @@ process bowtie2 { // section 24.1 of the labbook 20200707
process multiQC{
label "multiqc"
publishDir "${out_path}/reports", mode: 'copy', overwrite: false //
publishDir "${out_path}/reports", mode: 'copy', pattern: "multiqc_report.html", overwrite: false
input:
file "*" from fastqc_log_ch.mix(bowtie2_log_ch).mix(krakenreports).collect()
output:
file "*.html"
file "multiqc_report.html" into multiqc_ch
file "report.rmd" into log_ch12
script:
"""
multiqc .
multiqc . -n multiqc_report.html
echo -e "\\n\\n<br /><br />\\n\\n### MultiQC\\n\\n" > report.rmd
echo -e "Results are published in the [Report](./reports/multiqc_report.html) folder\\n\\n" >> report.rmd
"""
}
......@@ -470,7 +486,7 @@ process Q20 { // section 24.2 of the labbook 20200707
file "read_nb_before" into bow_read_nb_ch
file "read_nb_after" into q20_read_nb_ch
file "q20_report.txt"
file "report.rmd" into log_ch12
file "report.rmd" into log_ch13
script:
"""
......@@ -498,7 +514,7 @@ process no_soft_clipping { // section 24.4 of the labbook 20200707
file bam from q20_ch1
output:
file "report.rmd" into log_ch13
file "report.rmd" into log_ch14
script:
"""
......@@ -524,7 +540,7 @@ process duplicate_removal { // section 24.5 of the labbook 20200707. Warning: US
file "${file_name}_q20_nodup.bam" into dup_ch1, dup_ch2
file "dup_read_nb" into dup_read_nb_ch
file "dup_report.txt"
file "report.rmd" into log_ch14
file "report.rmd" into log_ch15
script:
"""
......@@ -543,7 +559,7 @@ process report1 {
val file_name
output:
file "report.rmd" into log_ch15
file "report.rmd" into log_ch16
script:
"""
......@@ -635,7 +651,7 @@ process insertion { // section 24.7 of the labbook 20200707
output:
file "${file_name}.pos" into orient_ch1, orient_ch2
file "insertion_report.txt"
file "report.rmd" into log_ch16
file "report.rmd" into log_ch17
script:
"""
......@@ -780,7 +796,7 @@ process report2 {
val insertion_dist
output:
file "report.rmd" into log_ch17
file "report.rmd" into log_ch18
script:
"""
......@@ -839,7 +855,7 @@ process global_logo { // 24.9.3 of the labbook 20200707
output:
file "global_logo_${file_name}.png" into fig_ch7
file "global_logo_report.txt"
file "report.rmd" into log_ch18
file "report.rmd" into log_ch19
script:
"""
......@@ -871,7 +887,7 @@ process final_insertion_files { // 44.1 of the labbook 20201210
file "${file_name}_annot.pos" into pos_ch1
file "${file_name}_annot_insertion.freq"
file "final_insertion_files_report.txt"
file "report.rmd" into log_ch19
file "report.rmd" into log_ch20
script:
"""
......@@ -903,7 +919,7 @@ process motif { // 43 of the labbook 20201209
file "motif_sites.pos" into motif_ch
file "motif_report.txt"
file "{head,table}*.txt" into motif_table_ch // warning: several files
file "report.rmd" into log_ch20
file "report.rmd" into log_ch21
script:
"""
......@@ -946,7 +962,7 @@ process random_insertion { // sections 44 of the labbook 20201210
file "obs_rd_insertions.pos" into obs_rd_insertions_pos_ch1
file "obs_rd_insertions.freq" into obs_rd_insertions_freq_ch1
file "random_insertion_report.txt"
file "report.rmd" into log_ch21
file "report.rmd" into log_ch22
script:
"""
......@@ -1000,7 +1016,7 @@ process plot_insertion { // sections 24.7.2 and 45 of the labbook 20200520
output:
file "*.png" into fig_ch9 // warning: several files
file "plot_insertion_report.txt"
file "report.rmd" into log_ch22
file "report.rmd" into log_ch23
script:
"""
......@@ -1062,7 +1078,7 @@ process backup {
output:
file "${config_file}" // warning message if we use file config_file
file "${log_file}" // warning message if we use file log_file
file "report.rmd" into log_ch23
file "report.rmd" into log_ch24
script:
"""
......@@ -1078,7 +1094,7 @@ process workflowVersion { // create a file with the workflow version in out_path
cache 'false'
output:
file "report.rmd" into log_ch24
file "report.rmd" into log_ch25
script:
"""
......@@ -1114,7 +1130,7 @@ process print_report { // section 8.8 of the labbook 20200520
input:
val file_name
val cute_path
file report from log_ch0.concat(log_ch1,log_ch2, log_ch3, log_ch4, log_ch5, log_ch6, log_ch7, log_ch8, log_ch9, log_ch10, log_ch11, log_ch12, log_ch13, log_ch14, log_ch15, log_ch16, log_ch17, log_ch18, log_ch19, log_ch20, log_ch21, log_ch22, log_ch23, log_ch24).collectFile(name: 'report.rmd', sort: false)
file report from log_ch0.concat(log_ch1,log_ch2, log_ch3, log_ch4, log_ch5, log_ch6, log_ch7, log_ch8, log_ch9, log_ch10, log_ch11, log_ch12, log_ch13, log_ch14, log_ch15, log_ch16, log_ch17, log_ch18, log_ch19, log_ch20, log_ch21, log_ch22, log_ch23, log_ch24, log_ch25).collectFile(name: 'report.rmd', sort: false)
tuple val ("stat_tempo_name"), file ("stat_tempo") from stat_fastq_5p_filter_ch2
file table from motif_table_ch // warning: several files
file png1 from fig_ch1
......@@ -1126,6 +1142,7 @@ process print_report { // section 8.8 of the labbook 20200520
file png7 from fig_ch7
file png8 from fig_ch8.collect() // warning: several files
file png9 from fig_ch9.collect() // warning: several files
file html from multiqc_ch
output:
file "report.html"
......@@ -1142,6 +1159,7 @@ process print_report { // section 8.8 of the labbook 20200520
cp ${table} ./files/ # this is to get hard files, not symlinks
cp ${png1} ${png2} ${png3} ${png4} ${png5} ${png6} ${png7} ${png8} ${png9} ./figures/ # Warning several files
cp ${png1} ./reports/nf_dag.png # trick to delude the knitting during the print report
cp ${html} ./reports/ # this is to get hard files, not symlinks
print_report.R "${cute_path}" "report_file.rmd" "print_report.txt"
"""
}
......
......@@ -73,7 +73,7 @@ env {
//////// variables that will be used below (and potentially in the main.nf file)
//// must be also exported
system_exec = 'slurm' // the system that runs the workflow. Either 'local' or 'slurm'
system_exec = 'local' // the system that runs the workflow. Either 'local' or 'slurm'
//docker_exe = true // true for docker and false for singularity
//out_path="/mnt/c/Users/Gael/Desktop" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop'
out_path="$baseDir/results" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop'
......@@ -159,10 +159,16 @@ dag {
singularity {
enabled = true
autoMounts = true // automatically mounts host paths in the executed container
runOptions = '--no-home'
//runOptions = '--home $HOME:/home/$USER --bind /pasteur' // provide any extra command line options supported by the singularity exec. HEre, fait un bind de tout /pasteur dans /pasteur du container. Sinon pas d accès
cacheDir = 'singularity' // name of the directory where remote Singularity images are stored. When rerun, the exec directly uses these without redownloading them. When using a computing cluster it must be a shared folder accessible to all computing nodes
if(system_exec == 'slurm'){
cacheDir = '/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/singularity' // name of the directory where remote Singularity images are stored. When rerun, the exec directly uses these without redownloading them. When using a computing cluster it must be a shared folder accessible to all computing nodes
}else{
cacheDir = 'singularity'
}
}
//////// end Scopes
......@@ -195,12 +201,13 @@ process {
cpus=1 // only used when name = "local" in the executor part above
memory='3G' // only used when name = "local" in the executor part above
}
if(system_exec == 'slurm'){
withLabel: kraken{
container="evolbioinfo/kraken:v2.0.8-beta"
cpus=20
memory='100G'
}
}
withLabel: fastqc {
container='evolbioinfo/fastqc:v0.11.8'
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment