Commit c20c104d authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

tempo: last version ok for Coli except in global_logo

parent 1d6ffb17
......@@ -146,7 +146,7 @@ Then run:
```bash
# distant main.nf file
HOME="$ZEUSHOME/14985_loot/" ; nextflow run --modules ${MODULES} -hub pasteur gmillot/14985_loot -r v7.9.0 -c $HOME/nextflow.config ; HOME="/pasteur/appa/homes/gmillot/"
HOME="$ZEUSHOME/14985_loot/" ; nextflow run --modules ${MODULES} -hub pasteur gmillot/14985_loot -r v7.10.0 -c $HOME/nextflow.config ; HOME="/pasteur/appa/homes/gmillot/"
# local main.nf file ($HOME changed to allow the creation of .nextflow into /$ZEUSHOME/14985_loot/. See NFX_HOME in the nextflow soft script)
HOME="$ZEUSHOME/14985_loot/" ; nextflow run --modules ${MODULES} main.nf ; HOME="/pasteur/appa/homes/gmillot/"
......
This diff is collapsed.
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -74,6 +74,7 @@ if(interactive() == FALSE){ # if(grepl(x = commandArgs(trailingOnly = FALSE), pa
"pos",
"ori_coord",
"ter_coord",
"nb_max_insertion_sites",
"file_name",
"cute",
"log"
......@@ -100,11 +101,16 @@ rm(tempo.cat)
################################ Test
# cat("\n\n!!!!!!!!!!!!!!!!!!! WARNING: test values are activated\n\n")
# stat <- "C:/Users/Gael/Documents/Git_projects/14985_loot/dataset/test.fastq_Nremove_trim_5pAttc_1-51.stat"
# attc_seq <- "CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG"
# cute <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R"
# pos <- "C:/Users/Gael/Documents/Git_projects/14985_loot/dataset/test.fastq2_q20.pos"
# ori_coord <- "2320711 2320942"
# ter_coord <- "4627368 4627400"
# nb_max_insertion_sites <- "6"
# file_name <- "caca"
# cute <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v11.2.0/cute_little_R_functions.R"
# log <- "report.txt"
################################ end Test
################################ Recording of the initial parameters
......@@ -119,6 +125,7 @@ param.list <- c(
"pos",
"ori_coord",
"ter_coord",
"nb_max_insertion_sites",
"file_name",
"cute",
"log"
......@@ -229,6 +236,7 @@ ee <- expression(arg.check <- c(arg.check, tempo$problem) , text.check <- c(text
tempo <- fun_check(data = pos, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = ori_coord, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = ter_coord, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = nb_max_insertion_sites, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = file_name, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = cute, class = "vector", typeof = "character", length = 1) ; eval(ee)
tempo <- fun_check(data = log, class = "vector", typeof = "character", length = 1) ; eval(ee)
......@@ -244,6 +252,7 @@ tempo.arg <-c(
"pos",
"ori_coord",
"ter_coord",
"nb_max_insertion_sites",
"file_name",
"cute",
"log"
......@@ -270,6 +279,10 @@ if(length(ori_coord) != 2 & any(grepl(ori_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ori_coord <- as.integer(ori_coord)
if(any(is.na(ori_coord))){
tempo.cat <- paste0("ERROR IN final_insertion_files.R:\nTHE CONVERSION OF THE ori_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ori_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
ter_coord <- strsplit(ter_coord, split = " ")[[1]]
if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally no NA with is.null()
......@@ -277,9 +290,21 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ter_coord <- as.integer(ter_coord)
if(any(is.na(ter_coord))){
tempo.cat <- paste0("ERROR IN final_insertion_files.R:\nTHE CONVERSION OF THE ter_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ter_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
if(length(nb_max_insertion_sites) != 1 & any(grepl(nb_max_insertion_sites, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN final_insertion_files.R:\nTHE nb_max_insertion_sites PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(nb_max_insertion_sites, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
nb_max_insertion_sites <- as.integer(nb_max_insertion_sites)
if(any(is.na(nb_max_insertion_sites))){
tempo.cat <- paste0("ERROR IN final_insertion_files.R:\nTHE CONVERSION OF THE nb_max_insertion_sites PARAMETER INTO INTEGER RETURNS NA: \n", paste0(nb_max_insertion_sites, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
# end other checkings
# reserved word checking
......@@ -298,7 +323,7 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
################ Ignition
fun_report(data = paste0("\n\n################################################################ final_insertion_files PROCESS\n\n"), output = log, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n################################################################ final_insertion_files PROCESS WITH FILE ", pos, "\n\n"), output = log, path = "./", overwrite = FALSE)
ini.date <- Sys.time()
ini.time <- as.numeric(ini.date) # time of process begin, converted into seconds
fun_report(data = paste0("\n\n################################ RUNNING DATE AND STARTING TIME\n\n"), output = log, path = "./", overwrite = FALSE)
......@@ -318,7 +343,7 @@ fun_report(data = paste0("\n\n################################ RUNNING\n\n"), ou
################ Data import
obs <- read.table(pos, stringsAsFactors = FALSE) # does not take the header
obs.ini <- read.table(pos, stringsAsFactors = FALSE) # does not take the header
################ end Data import
......@@ -326,7 +351,10 @@ obs <- read.table(pos, stringsAsFactors = FALSE) # does not take the header
############ modifications of imported tables
fun_report(data = paste0("\nHEAD OF THE INITAL FILE ", pos), output = log, path = "./", overwrite = FALSE)
fun_report(data = head(obs.ini), output = log, path = "./", overwrite = FALSE)
obs <- obs.ini # obs.ini will be only filtered for highest most frequent position and then returned for the seq_around_insertion process
names(obs) <- c("orient", "pos")
obs <- obs[2:1]
......@@ -344,23 +372,44 @@ obs$fork<- factor(obs$fork, levels = c(0, 16), labels = c("Leading", "Lagging"))
obs <- data.frame(Sequence = "obs", Position = obs$pos, names = paste(obs$fork, obs$orient, sep = "_"), fork = obs$fork, orient = obs$orient)
obs$orient[obs$orient == 0] <- "Forward"
obs$orient[obs$orient == 16] <- "Reverse"
fun_report(data = paste0("\nHEAD OF THE MODIFIED FILE ", pos), output = log, path = "./", overwrite = FALSE)
fun_report(data = head(obs), output = log, path = "./", overwrite = FALSE)
fun_report(data = paste0("\nNUMBER OF OBS POSITIONS:\n", format(nrow(obs), big.mark=",")), output = log, path = "./", overwrite = FALSE)
# saving position file
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(obs, file = paste0("./", file_name, "_annot.pos"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
# freq file
res <- aggregate(x = obs$Position, by = list(Sequence = obs$Sequence, Position = obs$Position, names = obs$names, fork = obs$fork, orient = obs$orient), FUN = length)
names(res)[names(res) == "x"] <- "freq"
if( ! grepl(x = file_name, pattern = "^.*nodup.*$")){
if(nb_max_insertion_sites > nrow(res)){
res <- res[order(res$freq, decreasing = TRUE)[1:nrow(res)], ]
tempo.cat <- paste0("\nWARNING: nb_max_insertion_sites PARAMETER IS GREATER THAN THE NUMBER OF DIFFERENT SITES: \n", format(nrow(res), big.mark=","))
fun_report(data = tempo.cat, output = log, path = "./", overwrite = FALSE)
cat(tempo.cat)
}else{
res <- res[order(res$freq, decreasing = TRUE)[1:nb_max_insertion_sites], ]
}
}
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(res, file = paste0("./", file_name, "_annot.freq"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
if( ! grepl(x = file_name, pattern = "^.*nodup.*$")){
obs <- obs[obs$pos %in% res$Position, ]
obs.ini <- obs.ini[obs.ini[ , 2] %in% res$Position, ] # obs.ini[ , 2] because no column names
}
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(res, file = paste0("./", file_name, "_annot_insertion.freq"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
write.table(obs, file = paste0("./", file_name, "_annot.pos"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
options(scipen = 1000)
write.table(obs.ini, file = paste0("./", file_name, ".pos"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
############ end modifications of imported tables
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -265,6 +265,7 @@ if(length(tempo) != 4){
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
freq <- tempo
freq_name_test <- tempo[1]
}
if(length(insertion_dist) != 1 & any(grepl(insertion_dist, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN global_logo.R:\nTHE insertion_dist PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(insertion_dist, collapse = " "))
......@@ -368,13 +369,16 @@ for(i0 in 1:length(freq)){
png(filename = paste0("global_logo_", file_name, ".png"), width = 5000, height = 1800, units = "px", res = 300)
if( ! grepl(x = freq_name_test, pattern = "^.*nodup.*$")){
png(filename = paste0("global_logo_dup_", file_name, ".png"), width = 5000, height = 1800, units = "px", res = 300)
}else{
png(filename = paste0("global_logo_nodup_", file_name, ".png"), width = 5000, height = 1800, units = "px", res = 300)
}
width <- 7
height <- 2.5
text.size <- 5
title.text.size <- 4
text.size <- 6
title.text.size <- 5
angle <- 90
tempo.just <- fun_gg_just(angle = angle, pos = "bottom")
......@@ -389,7 +393,8 @@ if(ncol(tempo) > 0){
gg4 <- ggplot2::scale_x_discrete(labels = c((-insertion_dist):(-1), 1:insertion_dist)) #remove the x initial numbers
gg5 <- ggplot2::annotate("text", x = 1:(insertion_dist * 2 - abs(decal)), y = -0.01, hjust = 1, vjust = 0.5, label = c((-insertion_dist + abs(decal)):(-1), 1:(insertion_dist - abs(decal) + 1)), size = text.size, angle = angle)
gg6 <- ggplot2::coord_cartesian(clip = "off")
suppressMessages(print(ggplot2::ggplot() + gg1 + gg2 + gg3 + gg4 + gg5 + gg6))
gg7 <- ggplot2::theme(text = ggplot2::element_text(size = text.size * 4))
suppressMessages(print(ggplot2::ggplot() + gg1 + gg2 + gg3 + gg4 + gg5 + gg6 + gg7))
}
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -260,6 +260,10 @@ if(length(insertion_dist) != 1 & any(grepl(insertion_dist, pattern = "\\D"))){#
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
insertion_dist <- as.integer(insertion_dist)
if(any(is.na(insertion_dist))){
tempo.cat <- paste0("ERROR IN logo.R:\nTHE CONVERSION OF THE insertion_dist PARAMETER INTO INTEGER RETURNS NA: \n", paste0(insertion_dist, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
# end other checkings
# reserved word checking
......@@ -338,8 +342,8 @@ png(filename = paste0("logo_", sub(x = freq, pattern = "\\.stat$", replacement =
width <- 7
height <- 2.5
text.size <- 5
title.text.size <- 4
text.size <- 6
title.text.size <- 5
angle <- 90
decal <- -1 # indicate the position occupied by the +1 position of the read (that correspond to first base of coli part of read) after rev complementation. Before rev-comp, it is 201 (as mentioned above). After, it is 400-201+1 = 200, thus -1. Allow to overlay consensus that are not centered on the +1 position
tempo.just <- fun_gg_just(angle = angle, pos = "bottom")
......@@ -355,7 +359,8 @@ if(ncol(tempo) > 0){
gg4 <- ggplot2::scale_x_discrete(labels = c((-insertion_dist):(-1), 1:insertion_dist)) #remove the x initial numbers
gg5 <- ggplot2::annotate("text", x = 1:(insertion_dist * 2), y = -0.01, hjust = 1, vjust = 0.5, label = c((-insertion_dist):(-1), 1:insertion_dist), size = text.size, angle = angle)
gg6 <- ggplot2::coord_cartesian(clip = "off")
suppressMessages(print(ggplot2::ggplot() + gg1 + gg2 + gg3 + gg4 + gg5 + gg6)) #
gg7 <- ggplot2::theme(text = ggplot2::element_text(size = text.size * 4))
suppressMessages(print(ggplot2::ggplot() + gg1 + gg2 + gg3 + gg4 + gg5 + gg6 + gg7))
}
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -293,6 +293,10 @@ if(length(ori_coord) != 2 & any(grepl(ori_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ori_coord <- as.integer(ori_coord)
if(any(is.na(ori_coord))){
tempo.cat <- paste0("ERROR IN motif.R:\nTHE CONVERSION OF THE ori_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ori_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
ter_coord <- strsplit(ter_coord, split = " ")[[1]]
if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally no NA with is.null()
......@@ -300,12 +304,20 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ter_coord <- as.integer(ter_coord)
if(any(is.na(ter_coord))){
tempo.cat <- paste0("ERROR IN motif.R:\nTHE CONVERSION OF THE ter_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ter_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
if(length(genome_size) != 1 & any(grepl(genome_size, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN random_insertion.R:\nTHE genome_size PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(genome_size, collapse = " "))
tempo.cat <- paste0("ERROR IN motif.R:\nTHE genome_size PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(genome_size, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
genome_size <- as.integer(genome_size)
if(any(is.na(genome_size))){
tempo.cat <- paste0("ERROR IN motif.R:\nTHE CONVERSION OF THE genome_size PARAMETER INTO INTEGER RETURNS NA: \n", paste0(genome_size, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -282,6 +282,10 @@ if(length(ori_coord) != 2 & any(grepl(ori_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ori_coord <- as.integer(ori_coord)
if(any(is.na(ori_coord))){
tempo.cat <- paste0("ERROR IN plot_coverage.R:\nTHE CONVERSION OF THE ori_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ori_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
ter_coord <- strsplit(ter_coord, split = " ")[[1]]
if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally no NA with is.null()
......@@ -289,6 +293,10 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ter_coord <- as.integer(ter_coord)
if(any(is.na(ter_coord))){
tempo.cat <- paste0("ERROR IN plot_coverage.R:\nTHE CONVERSION OF THE ter_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ter_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
if(length(color_coverage) != 1 & any(grepl(color_coverage, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN plot_coverage.R:\nTHE color_coverage PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(color_coverage, collapse = " "))
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......
This diff is collapsed.
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......
......@@ -37,8 +37,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN random_insertion.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN random_insertion.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -297,6 +297,10 @@ if(length(ori_coord) != 2 & any(grepl(ori_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ori_coord <- as.integer(ori_coord)
if(any(is.na(ori_coord))){
tempo.cat <- paste0("ERROR IN random_insertion.R:\nTHE CONVERSION OF THE ori_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ori_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
ter_coord <- strsplit(ter_coord, split = " ")[[1]]
if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally no NA with is.null()
......@@ -304,12 +308,20 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ter_coord <- as.integer(ter_coord)
if(any(is.na(ter_coord))){
tempo.cat <- paste0("ERROR IN random_insertion.R:\nTHE CONVERSION OF THE ter_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ter_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
if(length(genome_size) != 1 & any(grepl(genome_size, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN random_insertion.R:\nTHE genome_size PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(genome_size, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
genome_size <- as.integer(genome_size)
if(any(is.na(genome_size))){
tempo.cat <- paste0("ERROR IN random_insertion.R:\nTHE CONVERSION OF THE genome_size PARAMETER INTO INTEGER RETURNS NA: \n", paste0(genome_size, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
......
......@@ -36,8 +36,8 @@
# R version checking
if(version$version.string != "R version 4.0.5 (2021-03-31)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.0.5 RECOMMANDED\n\n================\n\n"))
if(version$version.string != "R version 4.1.2 (2021-11-01)"){
stop(paste0("\n\n================\n\nERROR IN plot_read_length.R\n", version$version.string, " IS NOT THE 4.1.2 RECOMMANDED\n\n================\n\n"))
}
# other initializations
erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
......@@ -274,6 +274,10 @@ if(length(ori_coord) != 2 & any(grepl(ori_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ori_coord <- as.integer(ori_coord)
if(any(is.na(ori_coord))){
tempo.cat <- paste0("ERROR IN seq_around_insertion.R:\nTHE CONVERSION OF THE ori_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ori_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
ter_coord <- strsplit(ter_coord, split = " ")[[1]]
if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally no NA with is.null()
......@@ -281,12 +285,20 @@ if(length(ter_coord) != 2 & any(grepl(ter_coord, pattern = "\\D"))){# normally n
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
ter_coord <- as.integer(ter_coord)
if(any(is.na(ter_coord))){
tempo.cat <- paste0("ERROR IN seq_around_insertion.R:\nTHE CONVERSION OF THE ter_coord PARAMETER INTO INTEGER RETURNS NA: \n", paste0(ter_coord, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
if(length(insertion_dist) != 1 & any(grepl(insertion_dist, pattern = "\\D"))){# normally no NA with is.null()
tempo.cat <- paste0("ERROR IN seq_around_insertion.R:\nTHE insertion_dist PARAMETER MUST BE A SINGLE INTEGER\nHERE IT IS: \n", paste0(insertion_dist, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE) # == in stop() to be able to add several messages between ==
}else{
insertion_dist <- as.integer(insertion_dist)
if(any(is.na(insertion_dist))){
tempo.cat <- paste0("ERROR IN seq_around_insertion.R:\nTHE CONVERSION OF THE insertion_dist PARAMETER INTO INTEGER RETURNS NA: \n", paste0(insertion_dist, collapse = " "))
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
}
......@@ -328,6 +340,7 @@ fun_report(data = paste0("\n\n################################ RUNNING\n\n"), ou
obs <- read.table(pos, stringsAsFactors = TRUE) # does not take the header
print(head(obs))
################ end Data import
......@@ -338,7 +351,10 @@ obs <- read.table(pos, stringsAsFactors = TRUE) # does not take the header
names(obs) <- c("orient", "pos")
# ori and dif
if( ! any(obs$orient %in% c(0, 16))){cat("\n\nERROR: OTHER THAN 0 OR 16 FOR FLAG\n\n")}
if( ! any(obs$orient %in% c(0, 16))){
tempo.cat <- "ERROR IN seq_around_insertion.R: OTHER THAN 0 OR 16 FOR FLAG"
stop(paste0("\n\n================\n\n", tempo.cat, "\n\n================\n\n"), call. = FALSE)
}
ori.center <- mean(ori_coord, na.rm = TRUE)
ter_center <- mean(ter_coord, na.rm = TRUE)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -33,7 +33,6 @@ env {
primer_fasta="$baseDir/dataset/20200520_adapters_TruSeq_B2699_14985_CL.fasta" // list of primers used for the library and used by Alien trimmer to trim the raw reads
//primer_fasta="/mnt/share/14985_loot/results/20200520_res_CL14985_newtrim_align/20200520_adapters_TruSeq_B2699_14985_CL.fasta"
//// end path and files
//// alientrimmer
alientrimmer_l_param=30 // L parameter of alienTrimmer
//// end alientrimmer
......@@ -52,17 +51,24 @@ env {
ref_path="$baseDir/dataset/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome
//ref_path="/pasteur/zeus/projets/p01/BioIT/gmillot/reference_genomes/coli_K12_MG1655_NC_000913.3_ORI_CENTERED/" // path of the reference genome
ref_file="Ecoli-K12-MG1655_ORI_CENTERED.fasta" // name of the the reference genome fasta file
tss_file="TSS_compatible_essential.txt" // Ecoli list of Transcription starting sites. Same as Z:\reference_genomes\coli_K12_MG1655_NC_000913.3\tss.txt but genome centered on the OriC. Write "NULL" if ref_file is not Ecoli
ess_file="Essential_genes_MG1655.tsv" // Ecoli list of essential genes same as Z:\reference_genomes\coli_K12_MG1655_NC_000913.3. Just used here to know which are essential genes. Coordinates not used. Thus, no need to center according to Ori. Wrtie "NULL" if ref_file is not Ecoli
cds_file="cds_ORI_CENTERED.txt" // Ecoli list of cds same as Z:\reference_genomes\coli_K12_MG1655_NC_000913.3_ORI_CENTERED. Write "NULL" if ref_file is not Ecoli
ori_coord="2320711 2320942" // [2320711, 2320942] // Ecoli centered coordinates
ter_coord="4627368 4627400" //[4627368, 4627400] // Ecoli centered coordinates
color_coverage="5" // three integers for the color of the three coverage plots[1, 2, 5]
xlab="Ecoli Genome (bp)" // name of the reference genome for graphics
genome_size="4641652" // in bp
prop_coding_genome = "0.88" // proportion of genome covered by CDS. See 20200922 section 30.5.1
prop_ess_coding_genome = "0.08" // proportion of the genome covered by essential CDS. See 20200922 section 30.5.1
insertion_dist="20" // nb of bases upstream and downstream of insertions sites on the ref genome to define a consensus sequence
motif_fw='G[AT]T' // regex indicating the sequence motif to use for random insertions. "NULL" means completely random. Must be 5' -> 3'
motif_rev='A[AT]C' // the antiparallel of motif_fw
window_size = "50000" // written as R vector, width of the sliding window on the reference genome in bp
step=100 // steps of the sliding window on the reference genome in bp
cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v11.0.0/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
window_size = "50000" // width of the sliding window on the reference genome in bp. A single space between the number between double quotes. Example: "50000 200000"
step = 100 // steps of the sliding window on the reference genome in bp
nb_max_insertion_sites = "6" // nb of higest sites used for insertion in data without duplicates removal
cute_path="https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v11.2.0/cute_little_R_functions.R" // single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
}
//////// end variables that will be used only in the main.nf
......@@ -221,7 +227,7 @@ process {
}
withLabel: r_ext {
container='gmillot/r_v4.0.5_extended_v2.0:gitlab_v6.4'
container='gmillot/r_v4.1.2_extended_v2.0:gitlab_v8.2'
cpus=1 // only used when name = "local" in the executor part above
memory='64G' // only used when name = "local" in the executor part above
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment