diff --git a/README.md b/README.md index 28a7c3e98e4258572e480552e5b76438c98cc219..cc7f2883f1a71a614d740dcb43793fcc8c20daae 100755 --- a/README.md +++ b/README.md @@ -192,6 +192,10 @@ Gitlab developers ## WHAT'S NEW IN +### v4.1.0 + +1) Ok up to plot after attC read selection and attC trimming, tested on full B2699, and debugged + ### v4.0.0 diff --git a/bin/plot_fivep_filtering.R b/bin/plot_fivep_filtering.R index 063ce47abcd5f743ca388aa67dc1411150d1c88f..0143bcfaec27232ff8faeabc5f0f014f631f6a36 100644 --- a/bin/plot_fivep_filtering.R +++ b/bin/plot_fivep_filtering.R @@ -47,7 +47,7 @@ # R version checking if(version$version.string != "R version 4.0.5 (2021-03-31)"){ - fun_report(data = paste0("\n\nWARNING: THE ", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n"), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("\n\nWARNING: THE ", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n"), output = "report.txt", path = "./", overwrite = FALSE) } # other initializations erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE @@ -294,12 +294,12 @@ sq <- substr(attc_seq, 1, as.numeric(tempo3[[2]])) ################ Ignition -fun_report(data = paste0("\n\n################################################################ plot_fivep_filtering PROCESS\n\n"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################################################ plot_fivep_filtering PROCESS\n\n"), output = "report.txt", path = "./", overwrite = FALSE) ini.date <- Sys.time() ini.time <- as.numeric(ini.date) # time of process begin, converted into seconds -fun_report(data = paste0("\n\n################################ RUNNING DATE AND STARTING TIME\n\n"), output = "report.txt", path = "./", no.overwrite = TRUE) -fun_report(data = paste0(ini.date, "\n\n"), output = "report.txt", path = "./", no.overwrite = TRUE) -fun_report(data = paste0("\n\n################################ RUNNING\n\n"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################ RUNNING DATE AND STARTING TIME\n\n"), output = "report.txt", path = "./", overwrite = FALSE) +fun_report(data = paste0(ini.date, "\n\n"), output = "report.txt", path = "./", overwrite = FALSE) +fun_report(data = paste0("\n\n################################ RUNNING\n\n"), output = "report.txt", path = "./", overwrite = FALSE) ################ End ignition @@ -316,7 +316,7 @@ if(erase.graphs == TRUE){ graphics.off() }else{ tempo.warn <- paste0("GRAPHICS HAVE NOT BEEN ERASED. GRAPHICAL PARAMETERS MAY HAVE NOT BEEN REINITIALIZED") - fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", overwrite = FALSE) warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) } @@ -343,7 +343,7 @@ if(ncol(stat.df) > 0){ stat.df3 <- cbind(pos = 1:nrow(stat.df), stat.df2) }else{ tempo.warn <- paste0("EMPTY .stat FILE: NO PLOT DRAWN") - fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", overwrite = FALSE) warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) } @@ -353,7 +353,7 @@ if(length(length2.vec) > 0){ length2.df$Length <- as.numeric(as.character(length2.df$Length)) }else{ tempo.warn <- paste0("EMPTY .length FILE: NO PLOT DRAWN") - fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("WARNING\n", tempo.warn), output = "report.txt", path = "./", overwrite = FALSE) warn <- paste0(ifelse(is.null(warn), tempo.warn, paste0(warn, "\n\n", tempo.warn))) } @@ -378,13 +378,13 @@ if(ncol(stat.df) > 0){ x.lim = c(0, nrow(stat.df) + 1), x.lab = "Read Position", x.tick.nb = nrow(stat.df), - x.inter.tick.nb = 0, + x.second.tick.nb = NULL, x.left.extra.margin = 0, x.right.extra.margin = 0, y.lab = "Proportion", y.lim = c(0, 1.2), y.tick.nb = 5, - y.inter.tick.nb = 3, + y.second.tick.nb = 1, y.top.extra.margin = 0, y.bottom.extra.margin = 0, grid = FALSE, @@ -409,13 +409,13 @@ if(length(length2.vec) > 0){ x.lim = c(0, 175), x.lab = "Read Length", x.tick.nb = 10, - x.inter.tick.nb = 0, + x.second.tick.nb = 4, x.left.extra.margin = 0, x.right.extra.margin = 0, y.lab = NULL, y.lim = NULL, y.tick.nb = 10, - y.inter.tick.nb = 2, + y.second.tick.nb = 1, y.top.extra.margin = 0, y.bottom.extra.margin = 0, grid = FALSE, @@ -455,13 +455,13 @@ set.seed(NULL) save(list = ls(), file = "all_objects.RData") -fun_report(data = paste0("\n\n################################ RUNNING END"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################ RUNNING END"), output = "report.txt", path = "./", overwrite = FALSE) end.date <- Sys.time() end.time <- as.numeric(end.date) total.lapse <- round(lubridate::seconds_to_period(end.time - ini.time)) -fun_report(data = paste0("\n\nEND TIME: ", end.date), output = "report.txt", path = "./", no.overwrite = TRUE) -fun_report(data = paste0("\n\nTOTAL TIME LAPSE: ", total.lapse), output = "report.txt", path = "./", no.overwrite = TRUE) -fun_report(data = paste0("\n\nALL DATA SAVED IN all_objects.RData"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\nEND TIME: ", end.date), output = "report.txt", path = "./", overwrite = FALSE) +fun_report(data = paste0("\n\nTOTAL TIME LAPSE: ", total.lapse), output = "report.txt", path = "./", overwrite = FALSE) +fun_report(data = paste0("\n\nALL DATA SAVED IN all_objects.RData"), output = "report.txt", path = "./", overwrite = FALSE) ################ end Environment saving @@ -470,11 +470,11 @@ fun_report(data = paste0("\n\nALL DATA SAVED IN all_objects.RData"), output = "r ################ Warning messages -fun_report(data = paste0("\n\n################################ RECAPITULATION OF WARNING MESSAGES"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################ RECAPITULATION OF WARNING MESSAGES"), output = "report.txt", path = "./", overwrite = FALSE) if( ! is.null(warn)){ - fun_report(data = paste0("\n\n", warn), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("\n\n", warn), output = "report.txt", path = "./", overwrite = FALSE) }else{ - fun_report(data = paste0("\n\nNO WARNING MESSAGE TO REPORT"), output = "report.txt", path = "./", no.overwrite = TRUE) + fun_report(data = paste0("\n\nNO WARNING MESSAGE TO REPORT"), output = "report.txt", path = "./", overwrite = FALSE) } @@ -484,14 +484,14 @@ if( ! is.null(warn)){ ################ Parameter printing -fun_report(data = paste0("\n\n################################ INITIAL SETTINGS OF PARAMETERS"), output = "report.txt", path = "./", no.overwrite = TRUE) -fun_report(data = param.ini.settings, output = "report.txt", path = "./", no.overwrite = TRUE, , vector.cat = TRUE) -fun_report(data = paste0("\n\n################################ R SYSTEM AND PACKAGES"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = paste0("\n\n################################ INITIAL SETTINGS OF PARAMETERS"), output = "report.txt", path = "./", overwrite = FALSE) +fun_report(data = param.ini.settings, output = "report.txt", path = "./", overwrite = FALSE, , vector.cat = TRUE) +fun_report(data = paste0("\n\n################################ R SYSTEM AND PACKAGES"), output = "report.txt", path = "./", overwrite = FALSE) tempo <- sessionInfo() tempo$otherPkgs <- tempo$otherPkgs[order(names(tempo$otherPkgs))] # sort the packages tempo$loadedOnly <- tempo$loadedOnly[order(names(tempo$loadedOnly))] # sort the packages -fun_report(data = tempo, output = "report.txt", path = "./", no.overwrite = TRUE, , vector.cat = TRUE) -fun_report(data = paste0("\n\n################################ JOB END\n\nTIME: ", end.date, "\n\nTOTAL TIME LAPSE: ", total.lapse, "\n"), output = "report.txt", path = "./", no.overwrite = TRUE) +fun_report(data = tempo, output = "report.txt", path = "./", overwrite = FALSE, , vector.cat = TRUE) +fun_report(data = paste0("\n\n################################ JOB END\n\nTIME: ", end.date, "\n\nTOTAL TIME LAPSE: ", total.lapse, "\n"), output = "report.txt", path = "./", overwrite = FALSE) ################ end Parameter printing diff --git a/bin/trim.sh b/bin/trim.sh index 22c82ddbf36d3082b429b245a4d198efa3d3cefd..dcd9ce103fa36771325b22c4bedce6d3e8bb6f01 100755 --- a/bin/trim.sh +++ b/bin/trim.sh @@ -28,7 +28,7 @@ AlienTrimmer -i input_file2 -c ${primer_fasta} -o ${output_file} -l ${alientrimm # rm tempo.txt # not removed to be able to use -resume cat tempo.txt >> ${log} -line_nb_before=$(zcat ${input_file2} | wc -l) +line_nb_before=$(zcat ${input_file} | wc -l) line_nb_after=$(cat ${output_file} | wc -l) #not compressed anymore echo -e "NUMBER OF SEQUENCE BEFORE N REMOVING: $((${line_nb_before} / 4))\n" >> ${log} diff --git a/dataset/test.fastq.gz b/dataset/test.fastq.gz index e15500a9f1489653325207efdd27d54109b597b4..21992efcc8659d97be47ff9d53fbd3216717e45e 100644 Binary files a/dataset/test.fastq.gz and b/dataset/test.fastq.gz differ diff --git a/dev/test.config b/dev/test.config index c319eeae00e66d7088d20bb94a754b2e91d30bdb..0ff132e6a98f7679fce253c30d627022a41289b3 100644 --- a/dev/test.config +++ b/dev/test.config @@ -43,7 +43,7 @@ env { // ^[CN][AN][AN][TN][TN][CN][AN][TN][TN][CN][AN][AN][GN][CN][CN][GN][AN][CN][GN][CN][CN][GN][CN][TN][TN][CN][GN][CN][GN]GCGCGGCTTAATTCAAGCG.+$ fivep_seq_nb=48 // must be the exact number of nuc positions indicated in fivep_seq_filtering //// end fivep_filtering - cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/479ce36b7b7fe47eef2c602c1ea66dbadbeee4b7/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R" + cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R" } //////// end variables that will be used only in the main.nf diff --git a/nextflow.config b/nextflow.config index 09e7ab8e1f1cf6b099ef5b46e75208bc23b8a76b..7713d01befa65b920cbe1170d255822d44834cc0 100755 --- a/nextflow.config +++ b/nextflow.config @@ -20,11 +20,11 @@ env { git_path="https://gitlab.pasteur.fr/gmillot/14985_loot/" //in_path="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset" //in_path="/mnt/share/14985_loot/dataset/B2699/00_Rawdata" - in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/1" // where initial fastq file is + in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B4985/2" // where initial fastq file is //in_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/dataset/B2699/00_Rawdata" // where initial fastq file is //fastq_file="test.fastq.gz" // fastq file name //fastq_file="Pool-B2699_S1_L001_R1_001.fastq.gz" - fastq_file="1-2_S1_L001_R1_001.fastq.gz" + fastq_file="2-4_S1_L001_R1_001.fastq.gz" //primer_fasta="/mnt/c/Users/Gael/Documents/Git_projects/14985_loot/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" primer_fasta="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads //primer_fasta="/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta" @@ -43,7 +43,7 @@ env { // ^[CN][AN][AN][TN][TN][CN][AN][TN][TN][CN][AN][AN][GN][CN][CN][GN][AN][CN][GN][CN][CN][GN][CN][TN][TN][CN][GN][CN][GN]GCGCGGCTTAATTCAAGCG.+$ fivep_seq_nb=48 // must be the exact number of nuc positions indicated in fivep_seq_filtering //// end fivep_filtering - cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/479ce36b7b7fe47eef2c602c1ea66dbadbeee4b7/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R" + cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R" } //////// end variables that will be used only in the main.nf @@ -60,7 +60,7 @@ out_path="/pasteur/zeus/projets/p01/BioIT/gmillot/14985_loot/results" // where t //// end must be also exported //// general variables -result_folder_name="20211201_res_CL14985_1_fivepFiltering" +result_folder_name="20211201_res_CL14985_2_fivepFiltering" //// end general variables //// slurm variables