Unverified Commit e6a9c673 authored by Frederic Lemoine's avatar Frederic Lemoine
Browse files

Removed dependencies to report.rmd

parent 988bd86a
......@@ -19,7 +19,7 @@ input_file=$1
output_file=$2
log=$3
echo -e "<br /><br />\n\n### Removal for reads made of N only\n\n" >> ${log}
echo -e "<br /><br />\n\n### Removal for reads made of N only\n\n" > ${log}
zcat ${input_file} | awk '{lineKind=(NR-1)%4;}lineKind==0{record=$0; next}lineKind==1{toGet=!($0~/^N*$/); if(toGet) print record}toGet' | gzip -c > ${output_file}
# warning: with no output dir for log.txt, the file is created in \\wsl$\Ubuntu-20.04\home\gael\work\35\b826898b7be994ff13b7bc73bc88d8\
# get the bad sequences + 3 other lines of the fastq #see https://stackoverflow.com/questions/11793942/delete-lines-before-and-after-a-match-in-bash-with-sed-or-awk
......
......@@ -21,7 +21,7 @@ log=$4
echo -e "<br /><br />\n\n### Selection of reads over ${nb} bases\n\n" >> ${log}
echo -e "<br /><br />\n\n### Selection of reads over ${nb} bases\n\n" > ${log}
# cutoff
awk -v var1=${nb} '{lineKind=(NR-1)%4}lineKind==0{record=$0; next}lineKind==1{toGet=(length($0)>=var1); if(toGet) print record}toGet' ${input_file} > ${output_file}_cutoff.fq
......
......@@ -28,7 +28,7 @@ log=$5
# log="report.rmd"
echo -e "<br /><br />\n\n### Removal of duplicates using the 5\' and 3\' coordinates\n\n" >> ${log}
echo -e "<br /><br />\n\n### Removal of duplicates using the 5\' and 3\' coordinates\n\n" > ${log}
SAMPLE_NAME=${input_file%.*} # recover the name of the file without extension
# check that no BX:Z: TAG already exists
......
......@@ -27,7 +27,7 @@ attc_seq=$7
log=$8
echo -e "<br /><br />\n\n### Selection of reads with the attC in 5'\n\n" >> ${log}
echo -e "<br /><br />\n\n### Selection of reads with the attC in 5'\n\n" > ${log}
# fastq filtering
awk -v var1=${fivep_seq_filtering} '
{lineKind=(NR-1)%4;}
......
......@@ -21,7 +21,7 @@ primer_fasta=$3
alientrimmer_l_param=$4
log=$5
echo -e "<br /><br />\n\n### Trim of the read for the primer parts\n\n" >> ${log}
echo -e "<br /><br />\n\n### Trim of the read for the primer parts\n\n" > ${log}
# sed '/^>.*$/d' ref_seq/adapters_TruSeq_B2699.fasta > tempo.adap.seq #in case we want to remove titles of the fasta files, but no need for AlienTrimmer
gzip ${input_file} -dc > input_file2
AlienTrimmer -i input_file2 -c ${primer_fasta} -o ${output_file} -l ${alientrimmer_l_param} | tee tempo.txt
......
......@@ -91,7 +91,7 @@ if(system_exec == 'local' || system_exec == 'slurm'){
process init {
label 'bash' // see the withLabel: bash in the nextflow config file
cache 'false' // not true, otherwise append to a preexisting report
cache 'true'
output:
file "report.rmd" into log_ch0
......@@ -120,13 +120,11 @@ process init {
process Nremove { // remove the reads made of N only. See section 8.3 of the labbook 20200520
label 'bash' // see the withLabel: bash in the nextflow config file
//publishDir "${out_path}", mode: 'copy', pattern: "report.rmd", overwrite: false
cache 'true'
input:
val file_name
file gz from fastq_ch
file "report.rmd" from log_ch0
output:
file "${file_name}_Nremove.gz" into fastq_Nremove_ch
......@@ -149,7 +147,6 @@ process trim { // Trim the oligo sequences. See section 8.4 of the labbook 20200
file gz from fastq_Nremove_ch
file pr from primer_ch
val alien_l_param from alien_l_param_ch
file "report.rmd" from log_ch1
output:
file "${file_name}_trim.fq" into fastq_trim_ch1, fastq_trim_ch2
......@@ -169,7 +166,6 @@ process fastqc1 { // section 8.5 of the labbook 20200520
input:
file fq from fastq_trim_ch1
file "report.rmd" from log_ch2
output:
file "${fq.baseName}_fastqc.*"
......@@ -177,7 +173,7 @@ process fastqc1 { // section 8.5 of the labbook 20200520
script:
"""
echo -e "<br /><br />\n\n### Read QC n°1\n\n" >> report.rmd
echo -e "<br /><br />\n\n### Read QC n°1\n\n" > report.rmd
echo -e "Results are published in the [fastQC1](./fastQC1) folder\n\n" >> report.rmd
fastqc ${fq} | tee tempo.txt
cat tempo.txt >> report.rmd
......@@ -198,7 +194,6 @@ process fivep_filtering { // section 8.6 to 8.13 of the labbook 20200520. Instea
val fivep_seq_nb from fivep_seq_nb_ch
val added_nb from added_nb_ch
val sum_of_2_nb from sum_ch
file "report.rmd" from log_ch3
output:
file "${file_name}_5pAtccRm.fq" into fastq_5p_filter_ch1, fastq_5p_filter_ch2
......@@ -234,7 +229,6 @@ process plot_fivep_filtering_stat { // section 8.7 to 8.11 of the labbook 202005
tuple val(nouse), file(stat) from stat_fastq_5p_filter_ch1
val attc_seq from attc_seq_ch2
val cute from cute_ch1
file "report.rmd" from log_ch4
output:
file "plot_fivep_filtering_stat.png" into fig_ch1
......@@ -250,7 +244,7 @@ process plot_fivep_filtering_stat { // section 8.7 to 8.11 of the labbook 202005
<center>
![Figure 1: Frequency of each base at the 5\' of the reads.](./figures/plot_fivep_filtering_stat.png){width=600}
</center>
" >> report.rmd
" > report.rmd
Rscript $workflow.projectDir/bin/plot_fivep_filtering_stat.R "${stat}" "${attc_seq}" "${cute}" "plot_fivep_filtering_stat_report.txt"
"""
// not space before <center> or # ![. Otherwise not correctly interpreted
......@@ -268,7 +262,6 @@ process plot_read_length_ini { // section 8.8 of the labbook 20200520
input:
file length from length_fastq_ini_ch
val cute from cute_ch2
file "report.rmd" from log_ch5
output:
file "plot_read_length_ini.png" into fig_ch2
......@@ -282,7 +275,7 @@ process plot_read_length_ini { // section 8.8 of the labbook 20200520
<center>
![Figure 1: Frequency of reads according to read size (in bp).](./figures/plot_read_length_ini.png){width=600}
</center>
' >> report.rmd
' > report.rmd
Rscript $workflow.projectDir/bin/plot_read_length.R "${length}" "ini" "${cute}" "plot_read_length_ini_report.txt"
"""
// single quotes required because of the !
......@@ -298,7 +291,6 @@ process plot_read_length_fivep_filtering { // section 8.12 of the labbook 202005
input:
file length from length_fastq_5p_filter_ch
val cute from cute_ch3
file "report.rmd" from log_ch6
output:
file "plot_read_length_fivep_filtering.png" into fig_ch3
......@@ -312,7 +304,7 @@ process plot_read_length_fivep_filtering { // section 8.12 of the labbook 202005
<center>
![Figure 2: Frequency of reads according to read size (in bp).](./figures/plot_read_length_fivep_filtering.png){width=600}
</center>
' >> report.rmd
' > report.rmd
Rscript $workflow.projectDir/bin/plot_read_length.R "${length}" "fivep_filtering" "${cute}" "plot_read_length_fivep_filtering_report.txt"
"""
// single quotes required because of the !
......@@ -327,7 +319,6 @@ process cutoff { // section 8.16 of the labbook 20200520
val file_name
file fq from fastq_5p_filter_ch2
val nb from cutoff_nb_ch
file "report.rmd" from log_ch7
output:
file "${file_name}_cutoff.fq" into cutoff_ch
......@@ -350,7 +341,6 @@ process plot_read_length_cutoff { // section 8.17 of the labbook 20200520
input:
file length from length_cutoff_ch
val cute from cute_ch4
file "report.rmd" from log_ch8
output:
file "plot_read_length_cutoff.png" into fig_ch4
......@@ -364,7 +354,7 @@ process plot_read_length_cutoff { // section 8.17 of the labbook 20200520
<center>
![Figure 3: Frequency of reads according to read size (in bp).](./figures/plot_read_length_cutoff.png){width=600}
</center>
' >> report.rmd
' > report.rmd
Rscript $workflow.projectDir/bin/plot_read_length.R "${length}" "cutoff" "${cute}" "plot_read_length_cutoff_report.txt"
"""
// single quotes required because of the !
......@@ -378,7 +368,6 @@ process fastqc2 { // section 8.14 of the labbook 20200520
input:
file fq from fastq_5p_filter_ch1
file "report.rmd" from log_ch9
output:
file "${fq.baseName}_fastqc.*"
......@@ -387,7 +376,7 @@ process fastqc2 { // section 8.14 of the labbook 20200520
script:
"""
echo -e "<br /><br />\\n\\n### Read QC n°2\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Read QC n°2\\n\\n" > report.rmd
echo -e "Results are published in the [fastQC2](./fastQC2) folder\\n\\n" >> report.rmd
fastqc ${fq} | tee tempo.txt
cat tempo.txt >> report.rmd
......@@ -406,7 +395,6 @@ process bowtie2 { // section 24.1 of the labbook 20200707
val ref_name
file fq from cutoff_ch
file ref from ref_ch1
file "report.rmd" from log_ch10
output:
file "${file_name}_bowtie2.bam" into bowtie2_ch1, bowtie2_ch2
......@@ -417,7 +405,7 @@ process bowtie2 { // section 24.1 of the labbook 20200707
"""
echo -e "<br /><br />\\n\\n### Bowtie2 indexing of the reference sequence\\n\\n" >> bowtie2_report.txt
bowtie2-build ${ref} ${ref_name} |& tee -a bowtie2_report.txt
echo -e "<br /><br />\\n\\n### Bowtie2 alignment\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Bowtie2 alignment\\n\\n" > report.rmd
echo -e "<br /><br />\\n\\n### Bowtie2 alignment\\n\\n" >> bowtie2_report.txt
bowtie2 --very-sensitive -x ${ref_name} -U ${fq} -t -S ${file_name}_bowtie2.sam |& tee -a tempo.txt
# --very-sensitive: no soft clipping allowed and very sensitive seed alignment
......@@ -461,7 +449,6 @@ process Q20 { // section 24.2 of the labbook 20200707
input:
val file_name
file bam from bowtie2_ch1
file "report.rmd" from log_ch11
output:
file "${file_name}_q20.bam" into q20_ch1, q20_ch2, q20_ch3
......@@ -474,7 +461,7 @@ process Q20 { // section 24.2 of the labbook 20200707
"""
samtools view -q 20 -b ${bam} > ${file_name}_q20.bam |& tee q20_report.txt
samtools index ${file_name}_q20.bam
echo -e "<br /><br />\\n\\n### Q20 filtering\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Q20 filtering\\n\\n" > report.rmd
read_nb_before=\$(samtools view ${bam} | wc -l | cut -f1 -d' ') # -h to add the header
read_nb_after=\$(samtools view ${file_name}_q20.bam | wc -l | cut -f1 -d' ') # -h to add the header
echo -e "\\n\\nNumber of sequences before Q20 filtering: \$(printf "%'d" \${read_nb_before})\\n" >> report.rmd
......@@ -494,14 +481,13 @@ process no_soft_clipping { // section 24.4 of the labbook 20200707
input:
file bam from q20_ch1
file "report.rmd" from log_ch12
output:
file "report.rmd" into log_ch13
script:
"""
echo -e "<br /><br />\\n\\n### Control that no more soft clipping in reads\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Control that no more soft clipping in reads\\n\\n" > report.rmd
echo -e "nb of reads with soft clipping (S) in CIGAR: \$(printf "%'d" \$(samtools view ${bam} | awk '\$6 ~ /.*[S].*/{print \$0}' | wc -l | cut -f1 -d' '))" >> report.rmd
echo -e "\\n\\ntotal nb of reads: \$(printf "%'d" \$(samtools view ${bam} | wc -l | cut -f1 -d' '))" >> report.rmd
"""
......@@ -518,7 +504,6 @@ process duplicate_removal { // section 24.5 of the labbook 20200707. Warning: US
val file_name
file bam from q20_ch2
file ref from ref_ch2
file "report.rmd" from log_ch13
output:
file "${file_name}_q20_nodup.bam" into dup_ch1, dup_ch2
......@@ -541,7 +526,6 @@ process report1 {
input:
val file_name
file "report.rmd" from log_ch14
output:
file "report.rmd" into log_ch15
......@@ -557,7 +541,7 @@ process report1 {
</center><br /><br /><center>
![Figure 6: After removal of reads with identical 5\' and 3\' coordinates](./figures/plot_${file_name}_q20_nodup_mini.png){width=600}
</center>
' >> report.rmd
' > report.rmd
"""
}
......@@ -570,13 +554,11 @@ process coverage { // section 24.5 of the labbook 20200707. Warning: USING 5' AN
input:
file bam from bowtie2_ch2.concat(q20_ch3, dup_ch1)
// file "report.rmd" from log_ch14.first() //
// file ref from ref_ch3 // not required because bedtools genomecov-g ${ref} not required when inputs are bam files
output:
file "*_mini.cov" into cov_ch // warning: several files
// file "*.cov" // coverage per base if ever required but long process
// file "report.rmd" into log_ch15.first() // warning several files
file "cov_report.txt" into cov_report_ch
script:
......@@ -589,7 +571,6 @@ process coverage { // section 24.5 of the labbook 20200707. Warning: USING 5' AN
}
cov_report_ch.collectFile(name: "cov_report.txt").subscribe{it -> it.copyTo("${out_path}/reports")} // concatenate all the cov_report.txt files in channel cov_report_ch into a single file published into
......@@ -609,7 +590,6 @@ process plot_coverage { // section 24.6 of the labbook 20200707
val color_coverage from color_coverage_ch1.first()
val xlab from xlab_ch1.first()
val cute from cute_ch5.first()
// file "report.rmd" from log_ch15
output:
file "plot_${cov.baseName}.png" into fig_ch5 // warning: several files
......@@ -636,7 +616,6 @@ process insertion { // section 24.7 of the labbook 20200707
input:
val file_name
file bam from dup_ch2
file "report.rmd" from log_ch15
output:
file "${file_name}.pos" into orient_ch1, orient_ch2
......@@ -645,7 +624,7 @@ process insertion { // section 24.7 of the labbook 20200707
script:
"""
echo -e "<br /><br />\\n\\n### Insertion positions\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Insertion positions\\n\\n" > report.rmd
echo -e "\\n\\nOne of the step is to correct insertion site read extremity for the reverse reads. It Consist in the redefinition of POS according to FLAG. See the [insertion_report.txt](./reports/insertion_report.txt) file in the reports folders for details\\n\\n" >> report.rmd
# extraction of bam column 2, 4 and 10, i.e., FALG, POS and SEQ
samtools view ${bam} | awk 'BEGIN{FS="\\t" ; OFS="" ; ORS=""}{print ">"\$2"\\t"\$4"\\n"\$10"\\n" }' > tempo
......@@ -682,7 +661,6 @@ process plot_insertion { // sections 24.7.2, 44.1 and 45.1 of the labbook 202005
input:
val file_name
file pos from orient_ch1
file "report.rmd" from log_ch16
val ori_coord from ori_coord_ch2
val ter_coord from ter_coord_ch2
val xlab from xlab_ch2
......@@ -698,7 +676,7 @@ process plot_insertion { // sections 24.7.2, 44.1 and 45.1 of the labbook 202005
script:
"""
echo -e "<br /><br />\\n\\n### Insertion plots\\n\\n" >> report.rmd
echo -e "<br /><br />\\n\\n### Insertion plots\\n\\n" > report.rmd
Rscript $workflow.projectDir/bin/plot_insertion.R "${pos}" "${ori_coord}" "${ter_coord}" "${xlab}" "${genome_size}" "${file_name}" "${cute}" "plot_insertion_report.txt"
pos_nb=\$(wc -l ${file_name}.pos | cut -f1 -d' ')
echo -e "\\n\\nNumber of different positions: \$(printf "%'d" \${pos_nb})\\n" >> report.rmd
......@@ -722,7 +700,6 @@ process backup {
input:
file config_file
file log_file
file "report.rmd" from log_ch17
output:
file "${config_file}" // warning message if we use file config_file
......@@ -731,7 +708,7 @@ process backup {
script:
"""
echo -e "<br /><br />\n\n### Backup\n\n" >> report.rmd
echo -e "<br /><br />\n\n### Backup\n\n" > report.rmd
echo -e "See the [reports](./reports) folder for all the details of the analysis, including the parameters used in the .config file" >> report.rmd
echo -e "<br /><br />Full .nextflow.log is in: ${launchDir}<br />The one in the [reports](./reports) folder is not complete (miss the end)" >> report.rmd
"""
......@@ -742,15 +719,12 @@ process workflowVersion { // create a file with the workflow version in out_path
label 'bash' // see the withLabel: bash in the nextflow config file
cache 'false'
input:
file "report.rmd" from log_ch18
output:
file "report.rmd" into log_ch19
script:
"""
echo -e "<br /><br />\n\n### Workflow Version\n\n" >> report.rmd
echo -e "<br /><br />\n\n### Workflow Version\n\n" > report.rmd
echo "Project (empty means no .git folder where the main.nf file is present): " \$(git -C ${projectDir} remote -v | head -n 1) >> report.rmd # works only if the main script run is located in a directory that has a .git folder, i.e., that is connected to a distant repo
echo "<br />Git info (empty means no .git folder where the main.nf file is present): " \$(git -C ${projectDir} describe --abbrev=10 --dirty --always --tags) >> report.rmd # idem. Provide the small commit number of the script and nextflow.config used in the execution
echo "<br />Cmd line: ${workflow.commandLine}" >> report.rmd
......@@ -782,7 +756,7 @@ process print_report { // section 8.8 of the labbook 20200520
input:
val file_name
val cute from cute_ch6
file "tempo_report" from log_ch19
file report from log_ch1.concat(log_ch2, log_ch3, log_ch4, log_ch5, log_ch6, log_ch7, log_ch8, log_ch9, log_ch10, log_ch11, log_ch12, log_ch13, log_ch14, log_ch15, log_ch16, log_ch17, log_ch18, log_ch19).collectFile(name: 'report.rmd', sort: false)
tuple val ("stat_tempo_name"), file ("stat_tempo") from stat_fastq_5p_filter_ch2
file "plot_fivep_filtering_stat" from fig_ch1
file "plot_read_length_ini" from fig_ch2.first()
......@@ -798,7 +772,7 @@ process print_report { // section 8.8 of the labbook 20200520
script:
"""
cp tempo_report report.rmd # this is to get hard files, not symlinks
#cp tempo_report report.rmd # this is to get hard files, not symlinks
mkdir figures
mkdir files
mkdir reports
......@@ -810,7 +784,7 @@ process print_report { // section 8.8 of the labbook 20200520
cp ${png} ./figures/ # Warning several files are copied using their initial names, i.e., the names they have in each work folders of the plot_coverage process
cp ${png2} ./figures/ # Warning several files
cp ${plot_read_length_cutoff} ./reports/nf_dag.png # trick to delude the knitting during the print report
Rscript $workflow.projectDir/bin/print_report.R "${cute}" "report.rmd" "print_report.txt"
Rscript $workflow.projectDir/bin/print_report.R "${cute}" "${report}" "print_report.txt"
"""
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment