Commit e6d98f20 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

Version v8.0.0: Operational version for Ecoli. But Warning: still problem when...

Version v8.0.0: Operational version for Ecoli. But Warning: still problem when using the option -resume of nextflow run, notably including a wrong figure 38, because of all the code between processes in the main.nf file that are not controled by the cache system
parent 5d4dba89
......@@ -209,6 +209,13 @@ Gitlab developers
## WHAT'S NEW IN
### v8.0.0
1) Many things added.
2) Operational version for Ecoli. But Warning: still problem when using the option -resume of nextflow run, notably including a wrong figure 38, because of all the code between processes in the main.nf file that are not controled by the cache system
### v7.10.0
1) Pointing to the singularity folder improved, new option "slurm_local" added better worflow report
......
......@@ -1073,7 +1073,7 @@ if(tss_path != "NULL"){
tempo1 <- nrow(final[tempo & final$KIND == "OBS", ])
tempo2 <- nrow(final[tempo & final$KIND == "RANDOM", ])
if(sum(tempo, na.rm = TRUE) > 0){
fun_gg_boxplot(data1 = final[tempo, ], y = "Distance", categ = c("KIND", "essential"), dot.categ = "same_orient", dot.color = fun_gg_palette(n = 5, "dark")[if(all(final[tempo, "same_orient"] == "SAME")){2}else if(all(final[tempo, "same_orient"] == "OPPOSITE")){5}else{c(5, 2)}], y.tick.nb = 6, dot.alpha = 0.3, dot.size = 3, legend.width = NULL, y.bottom.extra.margin = 0, y.lim = ylim, title = paste0("GENE: ", gene.loop[x[i2]], " | n = ", format(nrow(final[tempo, ]), big.mark = ","), ifelse(tempo1 > 0 , paste0(" | n.obs = ", tempo1), ""), ifelse(tempo2 > 0 , paste0(" | n.random = ", tempo2), "")), vertical = FALSE, dot.legend.name = "INSERTION / CDS\nORIENTATION", box.legend.name = "GENE", y.top.extra.margin = 0.1, x.lab = "", y.lab = "Distance from TSS (bp)", box.line.size = 0, box.fill = TRUE, box.alpha = 0, text.size = text.size, title.text.size = title.text.size, stat.size = annotate.text.size)
fun_gg_boxplot(data1 = final[tempo, ], y = "Distance", categ = c("KIND", "essential"), dot.categ = "same_orient", dot.color = fun_gg_palette(n = 5, "dark")[if(all(final[tempo, "same_orient"] == "SAME")){2}else if(all(final[tempo, "same_orient"] == "OPPOSITE")){5}else{c(5, 2)}], y.tick.nb = 6, dot.alpha = 0.3, dot.size = 3, legend.width = NULL, y.bottom.extra.margin = 0, y.lim = ylim, title = paste0("GENE: ", gene.loop[x[i2]], " | n = ", format(nrow(final[tempo, ]), big.mark = ","), ifelse(tempo1 > 0 , paste0(" | n.obs = ", tempo1), ""), ifelse(tempo2 > 0 , paste0(" | n.random = ", tempo2), "")), vertical = FALSE, dot.legend.name = "INSERTION / CDS\nORIENTATION", box.legend.name = "GENE", y.top.extra.margin = 0.1, x.lab = "", y.lab = "Distance from TSS (bp)", box.line.size = 0, box.fill = TRUE, box.alpha = 0, text.size = 12, title.text.size = 10, stat.size = 10)
}else{
fun_gg_empty_graph(text = paste0("GENE: ", gene.loop[x[i2]]))
}
......@@ -1400,7 +1400,7 @@ if(tss_path != "NULL"){
tempo1 <- nrow(res[tempo & res$KIND == "OBS", ])
tempo2 <- nrow(res[tempo & res$KIND == "RANDOM", ])
if(sum(tempo, na.rm = TRUE) > 0){
fun_gg_boxplot(data1 = res[tempo, ], y = "pos_prop", categ = c("KIND", "essential"), dot.categ = "same_orient", dot.color = fun_gg_palette(n = 5, "dark")[if(all(res[tempo, "same_orient"] == "SAME")){2}else if(all(res[tempo, "same_orient"] == "OPPOSITE")){5}else{c(5, 2)}], dot.alpha = 0.3, dot.size = 3, legend.width = NULL, y.bottom.extra.margin = 0, y.lim = c(0, 1), title = paste0("GENE: ", gene.loop[i0], " | n = ", format(nrow(res[tempo, ]), big.mark = ","), ifelse(tempo1 > 0 , paste0(" | n.obs = ", tempo1), ""), ifelse(tempo2 > 0 , paste0(" | n.random = ", tempo2), "")), vertical = FALSE, dot.legend.name = "INSERTION / CDS\nORIENTATION", box.legend.name = "GENE", y.top.extra.margin = 0.1, x.lab = "", y.lab = "GENE COORDINATES (PROP)", box.line.size = 0, box.fill = TRUE, box.alpha = 0, text.size = text.size, title.text.size = title.text.size, stat.size = annotate.text.size)
fun_gg_boxplot(data1 = res[tempo, ], y = "pos_prop", categ = c("KIND", "essential"), dot.categ = "same_orient", dot.color = fun_gg_palette(n = 5, "dark")[if(all(res[tempo, "same_orient"] == "SAME")){2}else if(all(res[tempo, "same_orient"] == "OPPOSITE")){5}else{c(5, 2)}], dot.alpha = 0.3, dot.size = 3, legend.width = NULL, y.bottom.extra.margin = 0, y.lim = c(0, 1), title = paste0("GENE: ", gene.loop[i0], " | n = ", format(nrow(res[tempo, ]), big.mark = ","), ifelse(tempo1 > 0 , paste0(" | n.obs = ", tempo1), ""), ifelse(tempo2 > 0 , paste0(" | n.random = ", tempo2), "")), vertical = FALSE, dot.legend.name = "INSERTION / CDS\nORIENTATION", box.legend.name = "GENE", y.top.extra.margin = 0.1, x.lab = "", y.lab = "GENE COORDINATES (PROP)", box.line.size = 0, box.fill = TRUE, box.alpha = 0, text.size = 12, title.text.size = 10, stat.size = 10)
}
}
fun_close()
......
>LAGGING_0::ref:1827068-1827108
CATCCAGTTCGAACTCTGCGTTATCCAGCGCGAGCACTCG
>LEADING_0::ref:2919428-2919468
GTACCGACTTAACTGTGTTGATCATCGTACGCAAGTGACC
>LEADING_0::ref:3859524-3859564
CGCCGCCTTTAACCAGATAGTTATACAGCAAGCGCGCACC
>LEADING_16::ref:1548171-1548211
CATGCTGAATAAACTGAAGTTATCCAGATAGTTCGCCAGC
>LEADING_16::ref:1643769-1643809
TGTTTCGGATAACAGGAAGTTATCCGAAGCGATGAGAGTT
>LEADING_16::ref:2210730-2210770
TTGTCTGTAGAAAAGTAAGATGAGGAGCGAAGGCATGAAA
>LAGGING_16::ref:2349135-2349175
TGGTCAATTCAACAATGCGTTTACCAGCATTAAGGAACAG
>LAGGING_16::ref:2378798-2378838
ATTGCGGATAAACATAGTGATCACAGGCGTAAACGTCGCC
>LAGGING_16::ref:2470510-2470550
CACATTACCTAAATCAAAGATATAGAGCATTTTTGCCTCC
>LAGGING_0::ref:1827068-1827108
CATCCAGTTCGAACTCTGCGTTATCCAGCGCGAGCACTCG
>LEADING_0::ref:2919428-2919468
GTACCGACTTAACTGTGTTGATCATCGTACGCAAGTGACC
>LEADING_0::ref:3859524-3859564
CGCCGCCTTTAACCAGATAGTTATACAGCAAGCGCGCACC
>LEADING_16::ref:1548171-1548211
GCTGGCGAACTATCTGGATAACTTCAGTTTATTCAGCATG
>LEADING_16::ref:1643769-1643809
AACTCTCATCGCTTCGGATAACTTCCTGTTATCCGAAACA
>LEADING_16::ref:2210730-2210770
TTTCATGCCTTCGCTCCTCATCTTACTTTTCTACAGACAA
>LAGGING_16::ref:2349135-2349175
CTGTTCCTTAATGCTGGTAAACGCATTGTTGAATTGACCA
>LAGGING_16::ref:2378798-2378838
GGCGACGTTTACGCCTGTGATCACTATGTTTATCCGCAAT
>LAGGING_16::ref:2470510-2470550
GGAGGCAAAAATGCTCTATATCTTTGATTTAGGTAATGTG
>LEADING_16::ref:1548171-1548211
GCTGGCGAACTATCTGGATAACTTCAGTTTATTCAGCATG
>LEADING_16::ref:1643769-1643809
AACTCTCATCGCTTCGGATAACTTCCTGTTATCCGAAACA
>LAGGING_0::ref:1827068-1827108
CATCCAGTTCGAACTCTGCGTTATCCAGCGCGAGCACTCG
>LEADING_16::ref:2210730-2210770
TTTCATGCCTTCGCTCCTCATCTTACTTTTCTACAGACAA
>LAGGING_16::ref:2349135-2349175
CTGTTCCTTAATGCTGGTAAACGCATTGTTGAATTGACCA
>LAGGING_16::ref:2378798-2378838
GGCGACGTTTACGCCTGTGATCACTATGTTTATCCGCAAT
>LAGGING_16::ref:2470510-2470550
GGAGGCAAAAATGCTCTATATCTTTGATTTAGGTAATGTG
>LEADING_0::ref:2919428-2919468
GTACCGACTTAACTGTGTTGATCATCGTACGCAAGTGACC
>LEADING_0::ref:3859524-3859564
CGCCGCCTTTAACCAGATAGTTATACAGCAAGCGCGCACC
>LEADING_16::ref:1548171-1548211
CATGCTGAATAAACTGAAGTTATCCAGATAGTTCGCCAGC
>LEADING_16::ref:1643769-1643809
TGTTTCGGATAACAGGAAGTTATCCGAAGCGATGAGAGTT
>LEADING_16::ref:2210730-2210770
TTGTCTGTAGAAAAGTAAGATGAGGAGCGAAGGCATGAAA
>LAGGING_16::ref:2349135-2349175
TGGTCAATTCAACAATGCGTTTACCAGCATTAAGGAACAG
>LAGGING_16::ref:2378798-2378838
ATTGCGGATAAACATAGTGATCACAGGCGTAAACGTCGCC
>LAGGING_16::ref:2470510-2470550
CACATTACCTAAATCAAAGATATAGAGCATTTTTGCCTCC
......@@ -450,7 +450,10 @@ process multiQC{
"""
multiqc . -n multiqc_report.html
echo -e "\\n\\n<br /><br />\\n\\n### MultiQC\\n\\n" > report.rmd
echo -e "Results are published in the [Report](./reports/multiqc_report.html) folder\\n\\n" >> report.rmd
if [[ ${system_exec} == "local" ]] ; then
echo -e "\\n\\nWarning: no Kraken performed when using local run\\n" >> report.rmd
fi
echo -e "\\n\\nResults are published in the [Report](./reports/multiqc_report.html) folder\\n" >> report.rmd
"""
}
......@@ -1056,7 +1059,7 @@ process random_insertion { // sections 44 of the labbook 20201210
process plot_insertion { // sections 24.7.2 and 45 of the labbook 20200520, for TSS, section 47 20201211, for CDS section 48 20201211
label 'r_ext' // see the withLabel: bash in the nextflow config file
publishDir "${out_path}/figures", mode: 'copy', pattern: "{*.png}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
publishDir "${out_path}/figures", mode: 'copy', pattern: "{*.png,*.pdf}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
publishDir "${out_path}/reports", mode: 'copy', pattern: "{plot_insertion_report.txt}", overwrite: false //
cache 'true'
......@@ -1159,15 +1162,15 @@ process plot_insertion { // sections 24.7.2 and 45 of the labbook 20200520, for
process goalign {
label 'goalign'
publishDir "${out_path}/figures", mode: 'copy', pattern: "{*.html}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
publishDir "${out_path}/figures", mode: 'copy', pattern: "{alignment.html}", overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob
publishDir "${out_path}/reports", mode: 'copy', pattern: "{goalign_report.txt}", overwrite: false //
cache 'true'
cache 'deep'
input:
file fasta from extract_seq_dup_ch2
output:
file "*.html"
file "alignment.html" into fig_ch8
file "goalign_report.txt"
script:
......@@ -1181,7 +1184,7 @@ process goalign {
PATTERN='LEADING_16|LAGGING_16'
awk -v var1=\$PATTERN '
BEGIN{ORS="\\n"}
lineKind=(NR-1)%2
{lineKind=(NR-1)%2}
lineKind==0{record=\$0 ; next}
lineKind==1{
toGet=(record ~ var1)
......@@ -1194,15 +1197,30 @@ process goalign {
}
next
}
' tempo > /dev/null
' tempo
# Goalign
if [ -s forward.fasta ] ; then # -s means exists and is not empty
goalign draw biojs -i forward.fasta > forward.html |& tee -a goalign_report.txt
fi
if [ -s reverse.fasta ] ; then
goalign draw biojs -i reverse.fasta > reverse.html |& tee -a goalign_report.txt
goalign revcomp --unaligned -i reverse.fasta -o tempo2 # rev-comp the 16 sequences
cat forward.fasta tempo2 > final.fasta
else # we cannot have neither reverse nor forward
cp forward.fasta final.fasta
fi
# add a hyphen before or after the sequence, to have correct alignment
awk -v var1=\$PATTERN '
BEGIN{ORS="\\n"}
{lineKind=(NR-1)%2}
lineKind==0{record=\$0 ; print \$0 ; next}
lineKind==1{
toGet=(record ~ var1)
if(toGet){
print "-"\$0 ; next
}else{
print \$0"-" ; next
}
}
' final.fasta > tempo3
goalign draw biojs --auto-detect -i tempo3 -o alignment.html |& tee -a goalign_report.txt
"""
}
......@@ -1232,7 +1250,7 @@ process dup_insertion_and_logo { // 52-53 of labbbok 20211115 and section 27 of
val cute_path
output:
file "*.png" into fig_ch8 // warning: 2 files
file "*.png" into fig_ch9 // warning: 2 files
file "dup_insertion_and_logo_report.txt"
file "report.rmd" into log_ch21
......@@ -1271,7 +1289,6 @@ process dup_insertion_and_logo { // 52-53 of labbbok 20211115 and section 27 of
![Figure \'\$(echo \$((\$FIG_NB + 1)))\': Selected sites (${nb_max_insertion_sites} most used insertion sites).](./figures/plot_${file_name}_insertion_dup_selected.png){width=600}
\\n\\n</center>\\n\\n
' >> report.rmd
echo -e "\\n\\n<br /><br />\\n\\nWarning: the frequency of each position is taken into account for the logo plot\\n\\n" >> report.rmd
echo -e "\\n\\nSelected sites with frequencies:\\n\\n" >> report.rmd
echo "
\\`\\`\\`{r, echo = FALSE}
......@@ -1282,7 +1299,13 @@ kableExtra::kable_styling(knitr::kable(tempo, row.names = TRUE, digits = 0, capt
" >> report.rmd
echo -e '
\\n\\n<br /><br />\\n\\n</center>\\n\\n
![Figure \'\$(echo \$((\$FIG_NB + 2)))\': With duplicates ${file_name} global logo on selected sites](./figures/global_logo_dup_${file_name}.png){width=600}
![Figure \'\$(echo \$((\$FIG_NB + 2)))\': Alignment of the selected sites](./figures/alignment.html){width=600}
\\n\\n</center>\\n\\n
' >> report.rmd
echo -e "\\n\\n<br /><br />\\n\\nWarning: the frequency of each position is taken into account in the logo plot\\n\\n" >> report.rmd
echo -e '
\\n\\n<br /><br />\\n\\n</center>\\n\\n
![Figure \'\$(echo \$((\$FIG_NB + 3)))\': With duplicates ${file_name} global logo on selected sites](./figures/global_logo_dup_${file_name}.png){width=600}
\\n\\n</center>\\n\\n
' >> report.rmd
"""
......@@ -1385,7 +1408,8 @@ process print_report { // section 8.8 of the labbook 20200520
file png5 from fig_ch5.collect() // warning: several files
file png6 from fig_ch6.collect() // warning: several files
file png7 from fig_ch7.collect() // warning: several files
file png8 from fig_ch8.collect() // warning: several files
file png8 from fig_ch8
file png9 from fig_ch9.collect() // warning: several files
file html from multiqc_ch
output:
......@@ -1401,7 +1425,7 @@ process print_report { // section 8.8 of the labbook 20200520
mkdir reports
cat ${stat_tempo} > ./files/${stat_tempo_name} # this is to get hard files, not symlinks
cp ${table1} ${table2} ./files/ # this is to get hard files, not symlinks
cp ${png1} ${png2} ${png3} ${png4} ${png5} ${png6} ${png7} ${png8} ./figures/ # Warning several files
cp ${png1} ${png2} ${png3} ${png4} ${png5} ${png6} ${png7} ${png8} ${png9} ./figures/ # Warning several files
cp ${png1} ./reports/nf_dag.png # trick to delude the knitting during the print report
cp ${html} ./reports/ # this is to get hard files from html from multiqc_ch, not symlinks
print_report.R "${cute_path}" "report_file.rmd" "print_report.txt"
......
......@@ -218,7 +218,11 @@ process {
withLabel: kraken{
container="evolbioinfo/kraken:v2.0.8-beta"
cpus=20
memory='100G'
if(system_exec == 'slurm'){
memory='512G'
}else{
memory='64G'
}
}
}
......@@ -230,13 +234,21 @@ process {
withLabel: r_ext {
container='gmillot/r_v4.1.2_extended_v2.0:gitlab_v8.2'
cpus=1 // only used when name = "local" in the executor part above
memory='64G' // only used when name = "local" in the executor part above
if(system_exec == 'slurm'){
memory='512G'
}else{
memory='90G'
}
}
withLabel: bowtie2 {
container='gmillot/bowtie2_v2.3.4.3_extended_v2.0:gitlab_v8.0'
cpus=12 // only used when name = "local" in the executor part above
memory='64G' // only used when name = "local" in the executor part above
if(system_exec == 'slurm'){
memory='512G'
}else{
memory='64G'
}
}
withLabel: multiqc {
......@@ -260,7 +272,11 @@ process {
withLabel: goalign {
container='evolbioinfo/goalign:v0.3.5'
cpus=12
memory='64G'
if(system_exec == 'slurm'){
memory='512G'
}else{
memory='64G'
}
}
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment