diff --git a/README.md b/README.md index 3f6702b0e6f7d4477be194b0541625355fc918a1..983f761585cae209a013dfa1401489cb41fd63f3 100755 --- a/README.md +++ b/README.md @@ -192,6 +192,10 @@ Gitlab developers ## WHAT'S NEW IN +### v3.0.0 + +1) Ok up to plot after attC read selection and attC trimming + ### v2.0.0 diff --git a/bin/plot_fivep_filtering.R b/bin/plot_fivep_filtering.R index 5c90e9cae66d35316460ea09f8e969822d07eefd..a7fe1ffd9856198535250dc4ca32458579c2ba35 100644 --- a/bin/plot_fivep_filtering.R +++ b/bin/plot_fivep_filtering.R @@ -33,7 +33,6 @@ # "cute" # "log" - ################################ End Introduction @@ -48,7 +47,7 @@ # R version checking if(version$version.string != "R version 4.0.5 (2021-03-31)"){ - cat(paste0("\n\nWARNING: THE ", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n")) + fun_report(data = paste0("\n\nWARNING: THE ", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) } # other initializations erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE @@ -93,6 +92,16 @@ if(interactive() == FALSE){ # if(grepl(x = commandArgs(trailingOnly = FALSE), pa ################################ End Config import +################################ Test + +# fun_report(data = paste0("\n\n!!!!!!!!!!!!!!!!!!! WARNING: test values are activated\n\n")) +# length2 <- "C:/Users/Gael/Documents/Git_projects/14985_loot/work/1f/a2ca7cf14c91886c19ad9bbac83474/test.fastq_Nremove_trim_5pAttc.length" +# stat <- "C:/Users/Gael/Documents/Git_projects/14985_loot/work/1f/a2ca7cf14c91886c19ad9bbac83474/test.fastq_Nremove_trim_5pAttc_1-48.stat" +# attc_seq <- "CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG" +# cute <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/479ce36b7b7fe47eef2c602c1ea66dbadbeee4b7/cute_little_R_functions.R" +# log <- "log_5p_filtering.txt" + +################################ end Test ################################ Recording of the initial parameters @@ -169,7 +178,10 @@ req.function <- c( "fun_df_remod", "fun_gg_scatter", "fun_gg_palette", - "fun_close" + "fun_open", + "fun_gg_empty_graph", + "fun_close", + "fun_report" ) tempo <- NULL for(i1 in req.function){ @@ -193,7 +205,8 @@ req.package.list <- c( "ggplot2", "lemon" ) -fun_pack(req.package = req.package.list, load = TRUE, lib.path = NULL) # packages are imported even if inside functions are written as package.name::function() in the present code +for(i in 1:length(req.package.list)){suppressMessages(library(req.package.list[i], character.only = TRUE))} +# fun_pack(req.package = req.package.list, load = TRUE, lib.path = NULL) # packages are imported even if inside functions are written as package.name::function() in the present code ################################ End Functions @@ -245,15 +258,15 @@ warn <- NULL # end warning initiation # other checkings # recover the number of bases used for 5p filtering in stat name (e.g., _1-48.stat) -tempo1 <- strsplit(stat, split = "_") -tempo2 <- tempo1[[length(tempo1)]] -tempo3 <- strsplit(tempo2, split = "[-.]") -if(tempo3[[1]] != "1"){ - tempo.cat <- paste0("\n\n================\n\nINTERNAL CODE ERROR 3 IN plot_fivep_filtering.R: CODE HAS TO BE MODIFIED\n\n============\n\n") +tempo1 <- strsplit(stat, split = "_")[[1]] +tempo2 <- tempo1[length(tempo1)] +tempo3 <- strsplit(tempo2, split = "[-.]")[[1]] +if(tempo3[1] != "1"){ + tempo.cat <- paste0("\n\n================\n\nINTERNAL CODE ERROR 4 IN plot_fivep_filtering.R: CODE HAS TO BE MODIFIED\n", paste(tempo3[1], collapse = " "), "\n", paste(tempo1, collapse = " "), "\n\n============\n\n") stop(tempo.cat, call. = FALSE) } -if(tempo3[[3]] != "stat"){ - tempo.cat <- paste0("\n\n================\n\nINTERNAL CODE ERROR 4 IN plot_fivep_filtering.R: CODE HAS TO BE MODIFIED\n\n============\n\n") +if(tempo3[3] != "stat"){ + tempo.cat <- paste0("\n\n================\n\nINTERNAL CODE ERROR 5 IN plot_fivep_filtering.R: CODE HAS TO BE MODIFIED\n", paste(tempo3[3], collapse = " "), "\n", paste(tempo2, collapse = " "), "\n\n============\n\n") stop(tempo.cat, call. = FALSE) } sq <- substr(attc_seq, 1, as.numeric(tempo3[[2]])) @@ -274,11 +287,14 @@ sq <- substr(attc_seq, 1, as.numeric(tempo3[[2]])) ################ Ignition -cat(paste0("\n\n################################################################\n\n\n\t\tplot_fivep_filtering\n\n\n################################################################\n\n")) + +fun_report(data = paste0("\n\n################################################################\n\n\n\t\tplot_fivep_filtering\n\n\n################################################################\n\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) ini.date <- Sys.time() ini.time <- as.numeric(ini.date) # time of process begin, converted into seconds -cat(paste0(("\n\n################################ RUNNING DATE AND STARTING TIME\n\n")) -cat(paste0((ini.date, "\n\n")) +fun_report(data = paste0("\n\n################################ RUNNING DATE AND STARTING TIME\n\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0(ini.date, "\n\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################ RUNNING\n\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) + ################ End ignition @@ -294,7 +310,7 @@ if(erase.graphs == TRUE){ graphics.off() }else{ tempo.warn <- paste0("GRAPHICS HAVE NOT BEEN ERASED. GRAPHICAL PARAMETERS MAY HAVE NOT BEEN REINITIALIZED") - cat(paste0("WARNING\n", tempo.warn)) + fun_report(data = paste0("WARNING\n", tempo.warn), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) } @@ -305,8 +321,10 @@ if(erase.graphs == TRUE){ ################ Data import -stat.df <- read.table(stat, header=FALSE, sep = "\t", stringsAsFactors = TRUE) # no env = sys.nframe(), inherit = FALSE in get() because look for function in the classical scope - +stat.df <- read.table(stat, row.names = 1) +length2.vec <- scan(length2, what = "integer") +print(stat.df) +print(length2.vec) ################ end Data import @@ -314,16 +332,25 @@ stat.df <- read.table(stat, header=FALSE, sep = "\t", stringsAsFactors = TRUE) # ############ modifications of imported tables -stat.df <- read.table(stat, row.names = 1) -stat.df <- as.data.frame(t(as.matrix(stat.df))) -stat.df2<- fun_df_remod(stat.df) -stat.df3 <- cbind(pos = 1:nrow(stat.df), stat.df2) - -length2.vec <- scan(length2, what = "integer") -length2.df <- data.frame(table(length2.vec)) -names(length2.df) <- c("Length", "Count") -length2.df$Length <- as.numeric(as.character(length2.df$Length)) +if(ncol(stat.df) > 0){ + stat.df <- as.data.frame(t(as.matrix(stat.df))) + stat.df2<- fun_df_remod(stat.df) + stat.df3 <- cbind(pos = 1:nrow(stat.df), stat.df2) +}else{ + tempo.warn <- paste0("EMPTY .stat FILE: NO PLOT DRAWN") + fun_report(data = paste0("WARNING\n", tempo.warn), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) + warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) +} +if(length(length2.vec) > 0){ + length2.df <- data.frame(table(length2.vec)) + names(length2.df) <- c("Length", "Count") + length2.df$Length <- as.numeric(as.character(length2.df$Length)) +}else{ + tempo.warn <- paste0("EMPTY .length FILE: NO PLOT DRAWN") + fun_report(data = paste0("WARNING\n", tempo.warn), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) + warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) +} ############ end modifications of imported tables @@ -331,63 +358,71 @@ length2.df$Length <- as.numeric(as.character(length2.df$Length)) ############ plotting -pdf(12, 4) -fun_gg_scatter( -data1 = stat.df3, -x = "pos", -y = "quanti", -categ = "quali", -geom = "geom_path", -alpha = 1, -color = fun_gg_palette(n = 5), -x.lim = c(0, nrow(stat.df3) + 1), -x.lab = "Read Position", -x.tick.nb = nrow(stat.df3), -x.inter.tick.nb = 0, -x.left.extra.margin = 0, -x.right.extra.margin = 0, -y.lab = "Proportion", -y.lim = c(0, 1.2), -y.tick.nb = 5, -y.inter.tick.nb = 3, -y.top.extra.margin = 0, -y.bottom.extra.margin = 0, -grid = FALSE, -article = TRUE, -legend.width = 0.1, -legend.name = "", -text.size = 16, -line.size = 1, -add = '+ggplot2::annotate(geom = "text", x = 1:nrow(stat.df3), y = 1.1, label = unlist(strsplit(sq, split = "")), size = 5, color = "black", hjust = 0.5, vjust = 0.5)' -) - - +fun_open(width = 12, height = 4, pdf.name = "plot_5p_filtering") # must be systematically opened for main.nf + + +if(ncol(stat.df) > 0){ + fun_gg_scatter( + data1 = stat.df3, + x = "pos", + y = "quanti", + categ = "quali", + geom = "geom_path", + alpha = 1, + color = fun_gg_palette(n = 5), + x.lim = c(0, nrow(stat.df3) + 1), + x.lab = "Read Position", + x.tick.nb = nrow(stat.df3), + x.inter.tick.nb = 0, + x.left.extra.margin = 0, + x.right.extra.margin = 0, + y.lab = "Proportion", + y.lim = c(0, 1.2), + y.tick.nb = 5, + y.inter.tick.nb = 3, + y.top.extra.margin = 0, + y.bottom.extra.margin = 0, + grid = FALSE, + article = TRUE, + legend.width = 0.1, + legend.name = "", + text.size = 16, + line.size = 1, + add = '+ggplot2::annotate(geom = "text", x = 1:nrow(stat.df3), y = 1.1, label = unlist(strsplit(sq, split = "")), size = 5, color = "black", hjust = 0.5, vjust = 0.5)' + ) +}else{ + fun_gg_empty_graph(text = "EMPTY .stat FILE: NO PLOT DRAWN") +} -fun_gg_scatter( -data1 = length2.df, -x = "Length", -y = "Count", -geom = "geom_line", -alpha = 1, -x.lim = c(0, 175), -x.lab = "Read Length", -x.tick.nb = 10, -x.inter.tick.nb = 0, -x.left.extra.margin = 0, -x.right.extra.margin = 0, -y.lab = NULL, -y.lim = NULL, -y.tick.nb = 10, -y.inter.tick.nb = 2, -y.top.extra.margin = 0, -y.bottom.extra.margin = 0, -grid = FALSE, -article = TRUE, -legend.width = 0, -legend.name = "", -text.size = 16, -line.size = 1 -) +if(length(length2.vec) > 0){ + fun_gg_scatter( + data1 = length2.df, + x = "Length", + y = "Count", + geom = "geom_line", + alpha = 1, + x.lim = c(0, 175), + x.lab = "Read Length", + x.tick.nb = 10, + x.inter.tick.nb = 0, + x.left.extra.margin = 0, + x.right.extra.margin = 0, + y.lab = NULL, + y.lim = NULL, + y.tick.nb = 10, + y.inter.tick.nb = 2, + y.top.extra.margin = 0, + y.bottom.extra.margin = 0, + grid = FALSE, + article = TRUE, + legend.width = 0, + legend.name = "", + text.size = 16, + line.size = 1 + ) +}else{ + fun_gg_empty_graph(text = "EMPTY .length FILE: NO PLOT DRAWN") +} ############ end plotting @@ -415,13 +450,13 @@ set.seed(NULL) save(list = ls(), file = "all_objects.RData") -cat(paste0(("\n\n################################ RUNNING END")) +fun_report(data = paste0("\n\n################################ RUNNING END"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) end.date <- Sys.time() end.time <- as.numeric(end.date) total.lapse <- round(lubridate::seconds_to_period(end.time - ini.time)) -cat(paste0("\n\nEND TIME: ", end.date)) -cat(paste0("\n\nTOTAL TIME LAPSE: ", total.lapse)) -cat(paste0("\n\nALL DATA SAVED IN all_objects.RData")) +fun_report(data = paste0("\n\nEND TIME: ", end.date), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\nTOTAL TIME LAPSE: ", total.lapse), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\nALL DATA SAVED IN all_objects.RData"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) ################ end Environment saving @@ -430,11 +465,11 @@ cat(paste0("\n\nALL DATA SAVED IN all_objects.RData")) ################ Warning messages -cat(paste0("\n\n################################ RECAPITULATION OF WARNING MESSAGES")) +fun_report(data = paste0("\n\n################################ RECAPITULATION OF WARNING MESSAGES"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) if( ! is.null(warn)){ - cat(paste0("\n\n", warn)) + fun_report(data = paste0("\n\n", warn), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) }else{ - cat(paste0("\n\nNO WARNING MESSAGE TO REPORT")) + fun_report(data = paste0("\n\nNO WARNING MESSAGE TO REPORT"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) } @@ -444,14 +479,14 @@ if( ! is.null(warn)){ ################ Parameter printing -cat(paste0("\n\n################################ INITIAL SETTINGS OF PARAMETERS")) -cat(paste0("\n\n", param.ini.settings)) -cat(paste0("\n\n################################ R SYSTEM AND PACKAGES")) +fun_report(data = paste0("\n\n################################ INITIAL SETTINGS OF PARAMETERS"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) +fun_report(data = param.ini.settings, output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE, , vector.cat = TRUE) +fun_report(data = paste0("\n\n################################ R SYSTEM AND PACKAGES"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) tempo <- sessionInfo() tempo$otherPkgs <- tempo$otherPkgs[order(names(tempo$otherPkgs))] # sort the packages tempo$loadedOnly <- tempo$loadedOnly[order(names(tempo$loadedOnly))] # sort the packages -cat(paste0("\n\n", tempo)) -cat(paste0("\nJOB END\n\nTIME: ", end.date, "\n\nTOTAL TIME LAPSE: ", total.lapse, "\n")) +fun_report(data = tempo, output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE, , vector.cat = TRUE) +fun_report(data = paste0("\n\n################################ JOB END\n\nTIME: ", end.date, "\n\nTOTAL TIME LAPSE: ", total.lapse, "\n"), output = "log_plot_5p_filtering.txt", path = "./", no.overwrite = TRUE) ################ end Parameter printing diff --git a/dataset/test.config b/dataset/test.config new file mode 100644 index 0000000000000000000000000000000000000000..c319eeae00e66d7088d20bb94a754b2e91d30bdb --- /dev/null +++ b/dataset/test.config @@ -0,0 +1,239 @@ +/* +######################################################################### +## ## +## nextflow.config ## +## ## +## Gael A. Millot ## +## Bioinformatics and Biostatistics Hub ## +## Computational Biology Department ## +## Institut Pasteur Paris ## +## ## +######################################################################### +*/ + + +//////// variables that will be used only in the main.nf + +// variables exported to the main.nf environment. See https://www.nextflow.io/docs/latest/config.html#scope-env +env { + //// path and files + git_path="https://gitlab.pasteur.fr/gmillot/14985_loot/" + in_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset" + //in_path="/mnt/share/14985_loot/dataset/B2699/00_Rawdata" + //in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/3" // where initial fastq file is + //in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata" // where initial fastq file is + fastq_file="test.fastq.gz" // fastq file name + //fastq_file="4-4_S1_L001_R1_001.fastq.gz" + //fastq_file="3-4_S1_L001_R1_001.fastq.gz" + primer_fasta="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" + //primer_fasta="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads + //primer_fasta="/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" + //// end path and files + + //// alientrimmer + alientrimmer_l_param=30 // L parameter of alienTrimmer + //// end alientrimmer + + //// fivep_filtering + attc_seq="CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG" // sequence of attc, in red and purple in section 4 20200505 of the CL labbook (48 bases on the left of the cutting site). Required for plotting. Warning: never change this sequence + fivep_seq_filtering='^CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG.+$' // regex indicating the 5' sequence of reads to select, then to trim from the selected reads. See the section 8.6 to 8.13 of the labbook 20200520, but instead of analysing and trimming in two steps (29 Nuc of AttC part of the primer then 19 Nuc between primer and Attc cutting site), perform all in a single step, and play with the regex, like Test also + // ^CAATTCATTCAAGCCGACGCCGCTTCGCG[GN][CN][GN][CN][GN][GN][CN][TN][TN][AN][AN][TN][TN][CN][AN][AN][GN][CN][GN].+$ + // [CN][AN][AN][TN][TN][CN][AN][TN][TN][CN][AN][AN][GN][CN][CN][GN][AN][CN][GN][CN][CN][GN][CN][TN][TN][CN][GN][CN][GN][GN][CN][GN][CN][GN][GN][CN][TN][TN][AN][AN][TN][TN][CN][AN][AN][GN][CN][GN].+$ + // ^CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG.+$ + // ^[CN][AN][AN][TN][TN][CN][AN][TN][TN][CN][AN][AN][GN][CN][CN][GN][AN][CN][GN][CN][CN][GN][CN][TN][TN][CN][GN][CN][GN]GCGCGGCTTAATTCAAGCG.+$ + fivep_seq_nb=48 // must be the exact number of nuc positions indicated in fivep_seq_filtering + //// end fivep_filtering + cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/479ce36b7b7fe47eef2c602c1ea66dbadbeee4b7/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R" +} + +//////// end variables that will be used only in the main.nf + + + + +//////// variables that will be used below (and potentially in the main.nf file) + +//// must be also exported +system_exec = 'local' // the system that runs the workflow. Either 'local' or 'slurm' +out_path="/mnt/c/Users/Gael/Desktop" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop' +//out_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results" // where the report file will be saved. Example report_path = '.' for where the main.nf run is executed or report_path = '/mnt/c/Users/Gael/Desktop' +//// end must be also exported + +//// general variables +result_folder_name="20211201_res_CL14985_test" +//// end general variables + +//// slurm variables +fastqueue = 'hubbioit' // fast for -p option of slurm. Example: fastqueue = 'common,dedicated'. Example: fastqueue = 'hubbioit' +fastqos= '--qos=fast' // fast for --qos option of slurm. Example: fastqos= '--qos=fast' +normalqueue = 'hubbioit' // normal for -p option of slurm. Example: normalqueue = 'bioevo' +normalqos = '--qos=hubbioit' // normal for --qos option of slurm. Example: normalqos = '--qos=dedicated' +longqueue = 'hubbioit' // slow for -p option of slurm. Example: longqueue = 'bioevo' +longqos = '--qos=hubbioit' // slow for --qos option of slurm. Example: longqos = '--qos=dedicated' +add_options = ' ' // additional option of slurm. Example: addoptions = '--exclude=maestro-1101,maestro-1034' or add_options = ' ' +//// end slurm variables + +//////// end variables that will be used below + + + +//////// Pre processing + +int secs = (new Date().getTime())/1000 +out_path="${out_path}/${result_folder_name}_${secs}" + +//////// end Pre processing + + + +//////// variables used here and also in the main.nf file + +env { + system_exec = "${system_exec}" + out_path = "${out_path}" +} + +//////// variables used here and also in the main.nf file + + + + + +//////// Scopes + +// kind of execution. Either 'local' or 'slurm' +// those are closures. See https://www.nextflow.io/docs/latest/script.html#closures +executor { + name = "${system_exec}" + queueSize = 2000 +} + +// create a report folder and print a html report file . If no absolute path, will be where the run is executed +// see https://www.nextflow.io/docs/latest/config.html#config-report +report { + enabled = true + file = "${out_path}/reports/report.html" // warning: here double quotes to get the nextflow variable interpretation +} + +// txt file with all the processes and info +trace { + enabled = true + file = "${out_path}/reports/trace.txt" +} + +// html file with all the processes +timeline { + enabled = true + file = "${out_path}/reports/timeline.html" +} + +// .dot picture of the workflow +dag { + enabled = true + file = "${out_path}/reports/dag.dot" +} + + +// define singularity parameters +singularity { + enabled = true + autoMounts = true // automatically mounts host paths in the executed container + //runOptions = '--home $HOME:/home/$USER --bind /pasteur' // provide any extra command line options supported by the singularity exec. HEre, fait un bind de tout /pasteur dans /pasteur du container. Sinon pas d accès + cacheDir = 'singularity' // name of the directory where remote Singularity images are stored. When rerun, the exec directly uses these without redownloading them. When using a computing cluster it must be a shared folder accessible to all computing nodes +} + +//////// end Scopes + + + +//////// directives + +// provide the default directives for all the processes in the main.nf pipeline calling this config file +process { +// directives for all the processes + // executor='local' // no need because already defined above in the executor scope + if(system_exec == 'slurm'){ + queue = "$fastqueue" + clusterOptions = "$fastqos $add_options" + scratch=false + maxRetries=1 + errorStrategy='retry' + } + + withLabel: bash { + container='gmillot/bash-extended_v3.0:gitlab_v4.0' + cpus=1 + memory='3G' + } + + withLabel: alien_trimmer { + container='gmillot/alien_trimmer_v0.4.0:gitlab_v5.1' // no most recent at 20210930 + cpus=1 + memory='3G' + } + + withLabel: fastqc { + container='evolbioinfo/fastqc:v0.11.8' + cpus=1 + } + + withLabel: r_ext { + container='gmillot/r-extended_v4.0.5:gitlab_v6.1' + cpus=1 + memory='64G' + } + + + + +// all the processes of the main.nf file with the label 'bedtools' will use this directives by default + withLabel: bedtools { + container='evolbioinfo/bedtools:v2.29.2' + cpus=1 + memory='3G' + } + + withLabel: samtools { + container='evolbioinfo/samtools:v1.11' + cpus=1 + memory='1G' + } + + withLabel: coverage { + container='evolbioinfo/samtools:v1.11' + cpus=1 + memory='5G' + } + + withLabel: gatk { + //scratch=true + container='broadinstitute/gatk:4.1.9.0' + memory='60G' + if(system_exec == 'slurm'){ + queue = {task.attempt>1 ? "$normalqueue" : "$fastqueue" } + clusterOptions = {task.attempt > 1 ? "$normalqos $add_options" : "$fastqos $add_options" } + } + } + + withLabel: bwa { + container="evolbioinfo/bwa:v0.7.17" + cpus=20 + memory='30G' + } + + withLabel: bcftools { + container="evolbioinfo/bcftools:f27f849" + cpus=1 + memory='10G' + } + + + + withLabel: multiqc { + container='ewels/multiqc:1.10.1' + errorStrategy='ignore' + cpus=1 + } +} + +//////// end directives \ No newline at end of file diff --git a/main.nf b/main.nf index 32de1a3b8c54b22f34b7c3900bf815b2561fbbf8..b64f70fa3d95c1282db132434a36875655870bf2 100755 --- a/main.nf +++ b/main.nf @@ -188,7 +188,7 @@ process fivep_filtering { // section 8.6 to 8.13 of the labbook 20200520. Instea process plot_fivep_filtering { // section 8.6 to 8.13 of the labbook 20200520. Instead of analysing and trimming in two steps (29 Nuc of AttC part of the primer then 19 Nuc between primer and Attc cutting site), perform all in a single step, and play with the regex label 'r_ext' // see the withLabel: bash in the nextflow config file publishDir "${out_path}", mode: 'copy', overwrite: false // https://docs.oracle.com/javase/tutorial/essential/io/fileOps.html#glob - cache 'true' + cache 'false' input: file length from length_fastq_5p_filter_ch file stat from stat_fastq_5p_filter_ch @@ -201,7 +201,7 @@ process plot_fivep_filtering { // section 8.6 to 8.13 of the labbook 20200520. I script: """ - Rscript $workflow.projectDir/bin/plot_fivep_filtering.R "${length}" "${stat}" "${attc_seq}" "${cute}" "log_5p_filtering.txt" + Rscript $workflow.projectDir/bin/plot_fivep_filtering.R "${length}" "${stat}" "${attc_seq}" "${cute}" "log_plot_5p_filtering.txt" # Warning: $workflow.projectDir/bin/ is the only way to have the execution rights of a .R file in the bin directory when the gitlab repo is pulled into /pasteur/sonic/homes/gmillot/.nextflow/assets/. See https://github.com/nextflow-io/nextflow/issues/698. Otherwise, the following message can appear: Fatal error: cannot open file '/pasteur/sonic/homes/gmillot/.nextflow/assets/gmillot/14985_loot/bin/plot_fivep_filtering.R': No such file or directory """ } diff --git a/nextflow.config b/nextflow.config index 344b638324c4fa3f53629f22de8f2c4744135097..5682582967d4a2d8a1ce7e67d2d4479189fef234 100755 --- a/nextflow.config +++ b/nextflow.config @@ -178,7 +178,7 @@ process { } withLabel: r_ext { - container='gmillot/r-extended_v4.0.5:gitlab_v2.0' + container='gmillot/r-extended_v4.0.5:gitlab_v6.1' cpus=1 memory='64G' }