Commit 67a09a44 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

v7.6.0 release: Everything's fine, except the multiQC

parent 22b2b5eb
......@@ -198,6 +198,11 @@ Gitlab developers
## WHAT'S NEW IN
### v7.6.0
1) Everything's fine, except the multiQC
### v7.5.0
1) Ok up to the plot_insertion process, tested using the test file
......
......@@ -340,10 +340,10 @@ if(ter_center > ori_center){
}else{
obs$fork[obs$pos <= ori_center & obs$pos >= ter_center] <- abs(obs$fork[obs$pos <= ori_center & obs$pos >= ter_center] - 16) # right of the Ori and left of the dif (dif < ori) is switch 0 -> 16 and 16 -> 0 to have the leading and lagging
}
obs$fork<- factor(obs$fork, levels = c(0, 16), labels = c("LEADING", "LAGGING"))
obs <- data.frame(seq = "obs", pos = obs$pos, names = paste(obs$fork, obs$orient, sep = "_"), fork = obs$fork, orient = obs$orient)
obs$orient[obs$orient == 0] <- "FORWARD"
obs$orient[obs$orient == 16] <- "REVERSE"
obs$fork<- factor(obs$fork, levels = c(0, 16), labels = c("Leading", "Lagging"))
obs <- data.frame(Sequence = "obs", Position = obs$pos, names = paste(obs$fork, obs$orient, sep = "_"), fork = obs$fork, orient = obs$orient)
obs$orient[obs$orient == 0] <- "Forward"
obs$orient[obs$orient == 16] <- "Reverse"
fun_report(data = head(obs), output = log, path = "./", overwrite = FALSE)
fun_report(data = paste0("\nNUMBER OF OBS POSITIONS:\n", format(nrow(obs), big.mark=",")), output = log, path = "./", overwrite = FALSE)
......@@ -355,7 +355,7 @@ options(scipen = 0)
# freq file
res <- aggregate(x = obs$pos, by = list(seq = obs$seq, pos = obs$pos, names = obs$names, fork = obs$fork, orient = obs$orient), FUN = length)
res <- aggregate(x = obs$Position, by = list(Sequence = obs$Sequence, Position = obs$Position, names = obs$names, fork = obs$fork, orient = obs$orient), FUN = length)
names(res)[names(res) == "x"] <- "freq"
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(res, file = paste0("./", file_name, "_annot_insertion.freq"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
......
......@@ -103,7 +103,8 @@ gawk -v var1=$header 'BEGIN{
# printing the result
echo -e "Length of the expected attC sequence at the 5' part of reads: ${fivep_seq_nb}\n\n" >> ${log}
echo -e "<br /><br />\n\n### Preparation of the graph of base frequencies\n\n" >> ${log}
echo -e "\n\nLength of the expected attC sequence at the 5' part of reads: ${fivep_seq_nb}\n\n" >> ${log}
echo -e "Number of bases added after the expected attC sequence at the 5' part of reads, for graphical purpose: ${added_nb}\n\n" >> ${log}
# cat ${output_file}_5pAttc_1-${fivep_seq_nb}.stat >> ${log}
echo -e "\n\n" >> ${log}
......
......@@ -104,10 +104,17 @@ rm(tempo.cat)
################################ Test
# cat("\n\n!!!!!!!!!!!!!!!!!!! WARNING: test values are activated\n\n")
# stat <- "C:/Users/Gael/Documents/Git_projects/14985_loot/dataset/test.fastq_Nremove_trim_5pAttc_1-51.stat"
# attc_seq <- "CAATTCATTCAAGCCGACGCCGCTTCGCGGCGCGGCTTAATTCAAGCG"
# cute <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v10.9.0/cute_little_R_functions.R"
# log <- "report.txt"
# fw <- "C:/Users/gael/Documents/Git_projects/14985_loot/work/46/ff75af3b4ab98e07e0a9150985e9a2/motif_fw.pos"
# rv <- "C:/Users/gael/Documents/Git_projects/14985_loot/work/46/ff75af3b4ab98e07e0a9150985e9a2/motif_rev.pos"
# ori_coord <- "2320711 2320942"
# ter_coord <- "4627368 4627400"
# genome_size <- "4641652"
# motif_fw <- "G[AT]T"
# motif_rev <- "A[AT]C"
# cute <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/v11.0.0/cute_little_R_functions.R"
# log <- "motif_report.txt"
# report.rmd <- "report.rmd"
################################ end Test
......@@ -348,50 +355,111 @@ rv <- read.table(rv, stringsAsFactors = FALSE, sep = ":")
############ modifications of imported tables
names(fw) <- c("pos", "seq")
names(rv) <- c("pos", "seq")
fun_report(data = paste0("\n\n<br /><br />Beginning of the motif positions in the forward strand:\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = head(fw), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />Beginning of the motif positions in the reverse strand:\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = head(rv), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />NUMBER OF FORWARD 5\'", motif_fw, "3\' SITES:\n", format(nrow(fw), big.mark=",")), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />NUMBER OF REVERSE 5\'", motif_rev, "3\' SITES:\n", format(nrow(rv), big.mark=",")), output = report.rmd, path = "./", overwrite = FALSE)
names(fw) <- c("Position", "Sequence")
names(rv) <- c("Position", "Sequence")
fun_report(data = paste0("\n\nBeginning of the motif positions in the forward strand:\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(head(fw), file = paste0("./head.fw.txt"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./head.fw.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = FALSE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nBeginning of the motif positions in the reverse strand:\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(head(fw), file = paste0("./head.rv.txt"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./head.rv.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = FALSE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nNumber of sites on the forward strand (detected as 5\'", motif_fw, "3\' motifs on this strand): ", format(nrow(fw), big.mark=",")), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nNumber of sites on the reverse strand (detected as 5\'", motif_rev, "3\' motifs on the forward strand): ", format(nrow(rv), big.mark=",")), output = report.rmd, path = "./", overwrite = FALSE)
fw <- data.frame(fw, orient = 0) # forward strand
if(any( ! grepl(fw$seq, pattern = motif_fw))){stop("\n\nERROR\n\n")}
if(any( ! grepl(fw$Sequence, pattern = motif_fw))){stop("\n\nERROR\n\n")}
rv <- data.frame(rv, orient = 16) # reverse strand
if(any( ! grepl(rv$seq, pattern = motif_rev))){stop("\n\nERROR\n\n")}
if(any( ! grepl(rv$Sequence, pattern = motif_rev))){stop("\n\nERROR\n\n")}
a <- rbind(fw, rv, stringsAsFactors = FALSE)
a$pos <- a$pos + 2 # because pos START AT ZERO AND IS FOR THE FIRST LEFT NUC
a$Position <- a$Position + 2 # because Position START AT ZERO AND IS FOR THE FIRST LEFT NUC
a <- data.frame(a, fork = a$orient, stringsAsFactors = TRUE)
ori.center <- mean(ori_coord, na.rm = TRUE)
ter.center <- mean(ter_coord, na.rm = TRUE)
if(ter.center > ori.center){
a$fork[a$pos < ori.center | a$pos > ter.center] <- abs(a$fork[a$pos < ori.center | a$pos > ter.center] - 16) # left of the Ori and right of the dif (dif > ori) is switch 0 -> 16 and 16 -> 0 to have the leading and lagging
a$fork[a$Position < ori.center | a$Position > ter.center] <- abs(a$fork[a$Position < ori.center | a$Position > ter.center] - 16) # left of the Ori and right of the dif (dif > ori) is switch 0 -> 16 and 16 -> 0 to have the leading and lagging
}else{
a$fork[a$pos <= ori.center & a$pos >= ter.center] <- abs(a$fork[a$pos <= ori.center & a$pos >= ter.center] - 16) # right of the Ori and left of the dif (dif < ori) is switch 0 -> 16 and 16 -> 0 to have the leading and lagging
a$fork[a$Position <= ori.center & a$Position >= ter.center] <- abs(a$fork[a$Position <= ori.center & a$Position >= ter.center] - 16) # right of the Ori and left of the dif (dif < ori) is switch 0 -> 16 and 16 -> 0 to have the leading and lagging
}
a$fork<- factor(a$fork, levels = c(0, 16), labels = c("LEADING", "LAGGING"))
a$fork<- factor(a$fork, levels = c(0, 16), labels = c("Leading", "Lagging"))
# ori and dif
if( ! any(a$orient %in% c(0, 16))){stop("\n\nERROR: OTHER THAN 0 OR 16 FOR FLAG\n\n")}
b <- data.frame(seq = a$seq, pos = a$pos, names = paste(a$fork, a$orient, sep = "_"), fork = a$fork, orient = a$orient)
b$orient[b$orient == 0] <- "FORWARD"
b$orient[b$orient == 16] <- "REVERSE"
b <- data.frame(Sequence = a$Sequence, Position = a$Position, names = paste(a$fork, a$orient, sep = "_"), fork = a$fork, orient = a$orient)
b$orient[b$orient == 0] <- "Forward"
b$orient[b$orient == 16] <- "Reverse"
# verifications right of Ori
fun_report(data = "\n\n<br /><br />\n\nON THE RIGHT OF THE ORI AND LEFT OF THE DIFF\nFORWARD INSERSION = LEADING\nREVERSE INSERSION = LAGGING", output = report.rmd, path = "./", overwrite = FALSE)
d <- b[b$pos > ori.center & b$p < ter.center,]
fun_report(data = addmargins(table(d[, c("fork", "orient")])), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(d$names)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\nOn the right of the Ori and the left of the ter:\n<br />Forward insertion = Leading (aka 0)\n<br />Reverse insertion = Lagging (aka 16)", output = report.rmd, path = "./", overwrite = FALSE)
d <- b[b$Position > ori.center & b$Position < ter.center,]
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(as.data.frame.matrix(addmargins(table(d[, c("fork", "orient")]))), file = paste0("./table1.txt"), row.names = TRUE, col.names = NA, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./table1.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = TRUE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(as.data.frame(addmargins(table(d$names))), file = paste0("./table2.txt"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./table2.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = FALSE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
# verifications left of Ori
fun_report(data = "\n\n<br /><br />\n\nON THE LEFT OF THE ORI OR RIGHT OF THE DIFF\nFORWARD INSERSION = LAGGING\nREVERSE INSERSION = LEADING", output = report.rmd, path = "./", overwrite = FALSE)
d <- b[b$pos < ori.center | b$p > ter.center,]
fun_report(data = addmargins(table(d[, c("fork", "orient")])), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(d$names)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\nOn the left of the Ori and the right of the ter:\n<br />Forward insertion = Lagging (aka 16)\n<br />Reverse insertion = Leading (aka 0)", output = report.rmd, path = "./", overwrite = FALSE)
e<- b[b$Position < ori.center | b$Position > ter.center,]
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(as.data.frame.matrix(addmargins(table(e[, c("fork", "orient")]))), file = paste0("./table3.txt"), row.names = TRUE, col.names = NA, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./table3.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = TRUE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(as.data.frame(addmargins(table(e$names))), file = paste0("./table4.txt"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./table4.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = FALSE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 0, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
# bed like file
......@@ -407,10 +475,10 @@ p.fw.lead <- (ter.center - ori.center) / genome_size
p.fw.lag <- 1 - p.fw.lead
p.rv.lead <- p.fw.lag
p.rv.lag <- p.fw.lead
fun_report(data = paste0("\n\n<br /><br />\n\nPROPORTION OF THE FORWARD LEADING PART OF THE GENOME [(ter.center - ori.center) / genome_size]\n", round(p.fw.lead, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />\n\nPROPORTION OF THE FORWARD LAGGING PART OF THE GENOME (1 - p.fw.lead)\n", round(p.fw.lag, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />\n\nPROPORTION OF THE REVERSE LEADING PART OF THE GENOME (= p.fw.lag)\n", round(p.rv.lead, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\n<br /><br />\n\nPROPORTION OF THE REVERSE LAGGING PART OF THE GENOME (= p.fw.lead)\n", round(p.rv.lag, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nProportion of the forward leading part of the genome (p.fw.lead = (ter.center - ori.center) / genome_size): ", round(p.fw.lead, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nProportion of the forward lagging part of the genome (p.fw.lag = 1 - p.fw.lead): ", round(p.fw.lag, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nProportion of the reverse leading part of the genome (p.rv.lead = p.fw.lag): ", round(p.rv.lead, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nProportion of the reverse lagging part of the genome (p.rv.lag = p.fw.lead): ", round(p.rv.lag, 3), "\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
......@@ -420,24 +488,48 @@ theo.p <- c(p.fw.lag = p.fw.lag / 2, p.rv.lag = p.rv.lag / 2, p.fw.lead = p.fw.l
final <- b
# nb of insertion must be the same
fun_report(data = paste0("\n\n<br /><br />\n\nnb of insertions of the 5 following printings must be the same\n\n"), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\nfile", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = nrow(final), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\norient", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(final$orient)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\nfork", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(final$fork)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\nfork and orient", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(final$orient, final$fork)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\njust to check that idem addmargins(table(final$seq, final$orient, final$fork)) -> OK", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nNb of insertions in the five following printings must be the same\n\n"), output = log, path = "./", overwrite = FALSE)
fun_report(data = paste0("\n\nfile: ", nrow(final)), output = log, path = "./", overwrite = FALSE)
fun_report(data = "\n\norient", output = log, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
fun_report(data = addmargins(table(final$orient)), output = log, path = "./", overwrite = FALSE)
options(scipen = 0)
fun_report(data = "\n\nfork", output = log, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
fun_report(data = addmargins(table(final$fork)), output = log, path = "./", overwrite = FALSE)
options(scipen = 0)
fun_report(data = "\n\nfork and orient", output = log, path = "./", overwrite = FALSE)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
fun_report(data = addmargins(table(final$orient, final$fork)), output = log, path = "./", overwrite = FALSE)
options(scipen = 0)
fun_report(data = "\n\nAnalysis of biais, using the theo prop based on the genome distances between Ori and ter (theo proportions above divided by 2):", output = report.rmd, path = "./", overwrite = FALSE)
tempo1 <- as.data.frame(addmargins(table(final$names)) / nrow(final))
tempo2 <- as.data.frame(c(theo.p, 1))
names(tempo2) <- "Theoretical_freq"
tempo <- cbind(tempo1, tempo2)
options(scipen = 1000) # to avoid writing of scientific numbers in tables, see https://stackoverflow.com/questions/3978266/number-format-writing-1e-5-instead-of-0-00001
write.table(tempo, file = paste0("./table8.txt"), row.names = FALSE, col.names = TRUE, append = FALSE, quote = FALSE, sep = "\t")
options(scipen = 0)
tempo.cat <- "
\`\`\`{r, echo = FALSE}
tempo <- read.table('./table8.txt', header = TRUE, sep = '\\t', check.names = FALSE) ;
kableExtra::kable_styling(knitr::kable(head(tempo), row.names = FALSE, caption = NULL, format='html', format.args = list(decimal.mark = '.', big.mark = ',', digits = 3, scientific = 1000)), bootstrap_options = c('striped', 'bordered', 'responsive', 'condensed'), font_size = 10, full_width = FALSE, position = 'left')
\`\`\`
\n\n
"
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE)
real <- table(final$names)
fun_report(data = real, output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\nAnalysis of biais (compare with the theoretical values below)", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = addmargins(table(final$names) / nrow(final)), output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\nTheo prop (based on the genome distances between Ori and ter)", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = theo.p, output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = "\n\n<br /><br />\n\ntest", output = report.rmd, path = "./", overwrite = FALSE)
fun_report(data = chisq.test(real, p = theo.p), output = report.rmd, path = "./", overwrite = FALSE) # theo.p does not come from obs
tempo <- chisq.test(real, p = theo.p)
tempo.cat <- paste0(tempo$method, "\n<br />\nX-squared = ", round(tempo$statistic, 2), ", df = ", tempo$parameter, ", p-value = ", format(tempo$p.value, digits = 3, scientific = -7))
fun_report(data = tempo.cat, output = report.rmd, path = "./", overwrite = FALSE) # theo.p does not come from obs
############ end modifications of imported tables
......
......@@ -104,8 +104,8 @@ rm(tempo.cat)
################################ Test
# pos <- "C:\\Users\\gael\\Documents\\Git_projects\\14985_loot\\work\\ec\\0c01dd6e92fc4bfa0f504ab1a42412\\obs_rd_insertions.pos"
# freq <- "C:\\Users\\gael\\Documents\\Git_projects\\14985_loot\\work\\ec\\0c01dd6e92fc4bfa0f504ab1a42412\\obs_rd_insertions.freq"
# pos <- "C:\\Users\\gael\\Documents\\Git_projects\\14985_loot\\work\\a3/86699ab6517b4040f39a7fb45cbf27\\obs_rd_insertions.pos"
# freq <- "C:\\Users\\gael\\Documents\\Git_projects\\14985_loot\\work\\a3/86699ab6517b4040f39a7fb45cbf27\\obs_rd_insertions.freq"
# ori_coord <- "2320711 2320942"
# ter_coord <- "4627368 4627400"
# xlab <- "Ecoli Genome (bp)"
......@@ -118,8 +118,6 @@ rm(tempo.cat)
################################ end Test
################################ Recording of the initial parameters
......@@ -388,7 +386,7 @@ obs.rd.pos <- read.table(pos, stringsAsFactors = FALSE, header = TRUE, sep = "\t
############ modifications of imported tables
names(obs.rd.freq)[names(obs.rd.freq) == "seq"] <- "KIND"
names(obs.rd.freq)[names(obs.rd.freq) == "Sequence"] <- "KIND"
tempo <- max(obs.rd.freq$freq) + 20
ori.freq <- data.frame(x= c(ori_coord[1], ori_coord[1], ori_coord[2], ori_coord[2], ori_coord[1]), y = c(-tempo, tempo, tempo, -tempo ,-tempo), ZONE = "Ori", stringsAsFactors = TRUE)
dif.freq <- data.frame(x= c(ter_coord[1], ter_coord[1], ter_coord[2], ter_coord[2], ter_coord[1]), y = c(-tempo, tempo, tempo, -tempo ,-tempo), ZONE = "Ter", stringsAsFactors = TRUE)
......@@ -396,7 +394,7 @@ structure.freq <- rbind(ori.freq, dif.freq, stringsAsFactors = TRUE)
obs.freq <- obs.rd.freq[obs.rd.freq$KIND == "obs", ]
names(obs.rd.pos)[names(obs.rd.pos) == "seq"] <- "KIND"
names(obs.rd.pos)[names(obs.rd.pos) == "Sequence"] <- "KIND"
obs.pos <- obs.rd.pos[obs.rd.pos$KIND == "obs", ]
......@@ -472,7 +470,7 @@ if(ncol(obs.freq) > 0){
png(filename = paste0("plot_", file_name, "_insertion_hist_forward.png"), width = 5000, height = 1800, units = "px", res = 300)
tempo1 <- table(obs.freq$freq[obs.freq$orient == "FORWARD"])
tempo1 <- table(obs.freq$freq[obs.freq$orient == "Forward"])
tempo2 <- names(tempo1)
names(tempo1) <- NULL
tempo3 <- data.frame(insertion_number = as.integer(tempo2), site_number = as.vector(tempo1))
......@@ -505,7 +503,7 @@ if(ncol(obs.freq) > 0){
}
png(filename = paste0("plot_", file_name, "_insertion_hist_reverse.png"), width = 5000, height = 1800, units = "px", res = 300)
tempo1 <- table(obs.freq$freq[obs.freq$orient == "REVERSE"])
tempo1 <- table(obs.freq$freq[obs.freq$orient == "Reverse"])
tempo2 <- names(tempo1)
names(tempo1) <- NULL
tempo3 <- data.frame(insertion_number = as.integer(tempo2), site_number = as.vector(tempo1))
......@@ -543,10 +541,10 @@ if(ncol(obs.freq) > 0){
png(filename = paste0("plot_", file_name, "_insertion_raw.png"), width = 5000, height = 1800, units = "px", res = 300)
if(ncol(obs.rd.freq) > 0){
tempo <- obs.rd.freq
tempo$freq[tempo$orient == "REVERSE"] <- tempo$freq[tempo$orient == "REVERSE"] * -1
tempo$freq[tempo$orient == "Reverse"] <- tempo$freq[tempo$orient == "Reverse"] * -1
fun_gg_scatter(
data1 = list(structure.freq, tempo), # res # res[res$KIND == "obs", ]
x = list("x", "pos"),
x = list("x", "Position"),
y = list("y", "freq"),
categ = list("ZONE", "KIND"),
geom = list("geom_step", "geom_stick"),
......@@ -573,19 +571,19 @@ if(ncol(obs.rd.freq) > 0){
for(i0 in window_size){
# leading and lagging
lead.obs <- fun_slide(data = obs.rd.pos$pos[obs.rd.pos$fork== "LEADING" & obs.rd.pos$KIND == "obs"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$pos), to = max(obs.rd.pos$pos), parall = FALSE, cute.path = cute)
lag.obs <- fun_slide(data = obs.rd.pos$pos[obs.rd.pos$fork== "LAGGING" & obs.rd.pos$KIND == "obs"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$pos), to = max(obs.rd.pos$pos), parall = FALSE, cute.path = cute)
lead.rd <- fun_slide(data = obs.rd.pos$pos[obs.rd.pos$fork== "LEADING" & obs.rd.pos$KIND == "random"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$pos), to = max(obs.rd.pos$pos), parall = FALSE, cute.path = cute)
lag.rd <- fun_slide(data = obs.rd.pos$pos[obs.rd.pos$fork== "LAGGING" & obs.rd.pos$KIND == "random"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$pos), to = max(obs.rd.pos$pos), parall = FALSE, cute.path = cute)
lead.obs <- fun_slide(data = obs.rd.pos$Position[obs.rd.pos$fork== "Leading" & obs.rd.pos$KIND == "obs"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$Position), to = max(obs.rd.pos$Position), parall = FALSE, cute.path = cute)
lag.obs <- fun_slide(data = obs.rd.pos$Position[obs.rd.pos$fork== "Lagging" & obs.rd.pos$KIND == "obs"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$Position), to = max(obs.rd.pos$Position), parall = FALSE, cute.path = cute)
lead.rd <- fun_slide(data = obs.rd.pos$Position[obs.rd.pos$fork== "Leading" & obs.rd.pos$KIND == "random"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$Position), to = max(obs.rd.pos$Position), parall = FALSE, cute.path = cute)
lag.rd <- fun_slide(data = obs.rd.pos$Position[obs.rd.pos$fork== "Lagging" & obs.rd.pos$KIND == "random"], window.size = i0, step = step, fun = length, from = min(obs.rd.pos$Position), to = max(obs.rd.pos$Position), parall = FALSE, cute.path = cute)
lead.obs <- data.frame(lead.obs, STRAND = "LEADING", KIND = "OBS", stringsAsFactors = TRUE)
lag.obs <- data.frame(lag.obs, STRAND = "LAGGING", KIND = "OBS", stringsAsFactors = TRUE)
lead.obs <- data.frame(lead.obs, STRAND = "Leading", KIND = "OBS", stringsAsFactors = TRUE)
lag.obs <- data.frame(lag.obs, STRAND = "Lagging", KIND = "OBS", stringsAsFactors = TRUE)
total.obs <- lead.obs[, ! names(lead.obs) %in% "STRAND"]
total.obs$value <- lead.obs$value + lag.obs$value
lead.rd <- data.frame(lead.rd, STRAND = "LEADING", KIND = "RANDOM", stringsAsFactors = TRUE)
lag.rd <- data.frame(lag.rd, STRAND = "LAGGING", KIND = "RANDOM", stringsAsFactors = TRUE)
lead.rd <- data.frame(lead.rd, STRAND = "Leading", KIND = "RANDOM", stringsAsFactors = TRUE)
lag.rd <- data.frame(lag.rd, STRAND = "Lagging", KIND = "RANDOM", stringsAsFactors = TRUE)
total.rd <- lead.rd[, ! names(lead.rd) %in% "STRAND"]
total.rd$value <- lead.rd$value + lag.rd$value
total <- rbind(total.obs, total.rd)
......@@ -636,8 +634,65 @@ for(i0 in window_size){
ggplot2::ggsave(paste0("plot_", file_name, "_insertion_bin_", i0, ".png"), plot = res$gtable, device = "png", width = 16.6, height = 6, units = "in", dpi = 300) # width = 5000, height = 1800, units = "px" normally but "px" not accepted in the r_ext container
}else{
png(filename = paste0("plot_", file_name, "_insertion_bin_", i0, ".png"), width = 5000, height = 1800, units = "px", res = 300)
fun_gg_empty_graph(text = "EMPTY pos FILE: NO PLOT DRAWN")
fun_gg_empty_graph(text = "EMPTY .pos FILE: NO PLOT DRAWN")
}
lead.obs.prop <- lead.obs
lead.obs.prop$value <- lead.obs$value / (lead.obs$value + lag.obs$value)
lag.obs.prop <- lag.obs
lag.obs.prop$value <- lag.obs$value / (lead.obs$value + lag.obs$value)
obs.prop <- rbind(lead.obs.prop, lag.obs.prop)
lead.rd.prop <- lead.rd
lead.rd.prop$value <- lead.rd$value / (lead.rd$value + lag.rd$value)
lag.rd.prop <- lag.rd
lag.rd.prop$value <- lag.rd$value / (lead.rd$value + lag.rd$value)
rd.prop <- rbind(lead.rd.prop, lag.rd.prop)
if(ncol(total) > 0){
# graph of total frequency
pdf(file = NULL) # send plots into a NULL file, no pdf file created. It is to draw the plot but then to save it using ggsave()
res <- fun_gg_scatter(data1 = list(structure, obs.prop, rd.prop),
x = list("x", "center", "center"),
y = list("y", "value", "value"),
geom = list("geom_step", "geom_line", "geom_line"),
categ = list("ZONE", "STRAND", "STRAND"),
alpha = 1,
color = list(c("black", "brown"), fun_gg_palette(n = 4)[1:2], fun_gg_palette(n = 4)[3:4]),
dot.size = 0.7,
line.size = 1,
x.lab = xlab,
x.lim = c(0, genome_size),
x.tick.nb = 6,
x.second.tick.nb = 3,
x.left.extra.margin = 0,
x.right.extra.margin = 0,
y.lab = "Proportion of insertion sites",
y.lim = c(0.3, 0.7),
y.tick.nb = 6,
y.second.tick.nb = 3,
y.top.extra.margin = 0,
y.bottom.extra.margin = 0,
grid = FALSE,
article = TRUE,
legend.width = 0.2,
legend.name = list("", "OBS", "RANDOM"),
text.size = 16,
title = paste0("PROPORTION OF INSERTION SITES (WARNING: MAX ~ 100 INTEG PER POSITION, SEE 24.7.2) | WINDOW SIZE: ", i0, " | STEP: ", step, " | GRAPH "),
title.text.size = 7,
return = TRUE
)
ggplot2::ggsave(paste0("plot_", file_name, "_lead_lag_insertion_bin_", i0, ".png"), plot = res$gtable, device = "png", width = 16.6, height = 6, units = "in", dpi = 300) # width = 5000, height = 1800, units = "px" normally but "px" not accepted in the r_ext container
}else{
png(filename = paste0("plot_", file_name, "_lead_lag_insertion_bin_", i0, ".png"), width = 5000, height = 1800, units = "px", res = 300)
fun_gg_empty_graph(text = "EMPTY .pos FILE: NO PLOT DRAWN")
}
}
......
This diff is collapsed.
task_id hash native_id name status exit submit duration realtime %cpu peak_rss peak_vmem rchar wchar
2 fd/253874 1266 Nremove (1) CACHED 0 2022-01-28 13:35:48.765 1.7s 419ms 49.6% 11.9 MB 70.6 MB 16.8 MB 14.5 MB
4 83/852866 2073 trim (1) CACHED 0 2022-01-28 13:35:50.548 2.9s 2s 45.8% 40.1 MB 5.6 GB 16.4 MB 12 MB
6 3d/bdd1b9 15400 fivep_filtering (1) CACHED 0 2022-01-30 01:14:25.657 3.2s 1.7s 25.1% 9.8 MB 61.4 MB 26 MB 14.3 MB
5 d4/0dac5b 15456 fastqc1 (1) CACHED 0 2022-01-30 01:14:25.705 7.5s 6.1s 54.2% 162 MB 3.1 GB 13.7 MB 1.2 MB
11 0e/1a77a6 1981 plot_read_length_fivep_filtering (1) CACHED 0 2022-01-30 01:27:08.145 13s 12.3s 65.7% 202.7 MB 2.4 GB 19.1 MB 383 KB
8 6a/ddfcf2 421 plot_read_length_ini (1) CACHED 0 2022-01-30 01:26:53.912 14.2s 13s 61.4% 200.1 MB 2.4 GB 19.1 MB 386.8 KB
7 c2/d12542 16559 cutoff (1) CACHED 0 2022-01-30 01:14:28.951 1.9s 742ms 18.6% 9.6 MB 61.3 MB 7 MB 3.9 MB
10 39/605ace 19021 plot_fivep_filtering_stat (1) CACHED 0 2022-01-30 01:14:56.916 13.8s 13.1s 64.4% 203.4 MB 2.4 GB 19.1 MB 815.4 KB
14 90/6d8ce9 2704 plot_read_length_cutoff (1) CACHED 0 2022-01-30 01:27:21.165 13s 12.3s 65.6% 202.6 MB 2.4 GB 19.1 MB 375.6 KB
15 e7/3d41cd 20466 bowtie2 (1) CACHED 0 2022-01-30 01:15:24.166 4.3s 3.3s 56.2% 112.9 MB 239.5 MB 35 MB 16.2 MB
9 7f/97f499 16520 fastqc2 (1) CACHED 0 2022-01-30 01:14:28.909 5.9s 4.9s 62.1% 149.6 MB 3.1 GB 12.1 MB 1.2 MB
16 6f/aa6a8a 20913 Q20 (1) CACHED 0 2022-01-30 01:15:28.547 1.5s 345ms 20.0% 5.7 MB 42.8 MB 3.2 MB 2.2 MB
17 21/74ade8 20884 coverage (1) CACHED 0 2022-01-30 01:15:28.529 1.9s 706ms 17.4% 5.1 MB 44.6 MB 479.7 KB 91 KB
19 25/0d680f 21422 no_soft_clipping (1) CACHED 0 2022-01-30 01:15:30.487 1.4s 255ms 12.9% 3.4 MB 38.4 MB 2.1 MB 1.5 MB
20 cc/9865db 21391 duplicate_removal (1) CACHED 0 2022-01-30 01:15:30.466 2.8s 1.7s 26.5% 10.2 MB 68.1 MB 12.9 MB 6.6 MB
21 67/ee6b1b 21373 coverage (2) CACHED 0 2022-01-30 01:15:30.446 1.7s 676ms 18.1% 5 MB 44.6 MB 335.1 KB 82.4 KB
22 80/568e06 22043 plot_coverage (1) CACHED 0 2022-01-30 01:15:32.186 13.4s 12.6s 65.2% 201.2 MB 2.4 GB 19.1 MB 463.2 KB
23 80/428c10 22408 insertion (1) CACHED 0 2022-01-30 01:15:34.285 1.3s 430ms 20.7% 9.1 MB 65.7 MB 2.5 MB 1.7 MB
24 8a/2740ec 23840 coverage (3) CACHED 0 2022-01-30 01:16:00.395 1.4s 555ms 19.1% 5.1 MB 44.6 MB 310.3 KB 82.1 KB
27 96/e2950b 24770 seq_around_insertion (1) CACHED 0 2022-01-30 01:16:18.276 7.9s 7.3s 55.2% 152.9 MB 2.3 GB 18 MB 229.7 KB
25 c6/0aee4d 23120 plot_coverage (2) CACHED 0 2022-01-30 01:15:45.575 14.8s 14.1s 58.2% 201.2 MB 2.4 GB 19.1 MB 454.8 KB
28 86/a57288 25287 plot_coverage (3) CACHED 0 2022-01-30 01:16:26.175 17.1s 16.4s 50.6% 252.9 MB 2.5 GB 19.1 MB 458 KB
29 6a/71ea48 26063 extract_seq (1) CACHED 0 2022-01-30 01:16:43.296 1.7s 828ms 19.0% 5.7 MB 50.2 MB 9.1 MB 4.5 MB
26 f0/b5af2b 3426 plot_insertion (1) CACHED 0 2022-01-30 01:27:34.177 15.4s 14.7s 72.5% 206.8 MB 2.4 GB 19.8 MB 864.2 KB
31 da/e6a4cf 26307 base_freq (2) CACHED 0 2022-01-30 01:16:45.043 1.4s 81ms 13.4% 0 0 281.7 KB 18.9 KB
30 3a/0c42c6 26268 base_freq (1) CACHED 0 2022-01-30 01:16:45.027 1.3s 81ms 11.0% 0 0 279.6 KB 16.8 KB
32 20/eb7cac 26252 base_freq (3) CACHED 0 2022-01-30 01:16:45.011 1.2s 85ms 14.5% 0 0 282.5 KB 19.7 KB
33 f8/e0ef18 26344 base_freq (4) CACHED 0 2022-01-30 01:16:45.059 1.4s 83ms 11.2% 0 0 281.8 KB 19 KB
34 f0/862ba5 28046 logo (3) CACHED 0 2022-01-30 01:17:03.705 10.7s 10s 46.6% 207.2 MB 2.4 GB 14.6 MB 851.6 KB
35 da/c3ac83 27463 logo (2) CACHED 0 2022-01-30 01:16:55.075 8.6s 7.9s 58.8% 208.9 MB 2.4 GB 14.6 MB 1001.5 KB
36 e2/bc51e9 26769 logo (1) CACHED 0 2022-01-30 01:16:46.254 8.8s 8s 57.8% 205.3 MB 2.4 GB 14.6 MB 873 KB
37 e9/63ebcc 28714 logo (4) CACHED 0 2022-01-30 01:17:14.395 8.8s 8.1s 58.3% 205.9 MB 2.4 GB 14.6 MB 881.9 KB
38 cb/181c64 4148 global_logo CACHED 0 2022-01-30 01:27:49.555 9s 8.3s 60.0% 200.4 MB 2.4 GB 14.6 MB 850 KB
1 37/636e49 5675 init COMPLETED 0 2022-01-30 01:32:02.438 1.6s 11ms 5.6% 0 0 104.2 KB 672 B
3 da/ad6e2d 5704 report1 COMPLETED 0 2022-01-30 01:32:02.486 1.6s 27ms 5.4% 0 0 104.6 KB 750 B
12 5a/b6224e 5787 backup COMPLETED 0 2022-01-30 01:32:02.610 1.6s 13ms 5.6% 0 0 104.4 KB 521 B
39 95/207a1f 6009 report2 COMPLETED 0 2022-01-30 01:32:02.953 1.4s 40ms 8.5% 0 0 130.9 KB 1.4 KB
13 03/b99a01 5827 workflowVersion COMPLETED 0 2022-01-30 01:32:02.651 2.1s 600ms 11.8% 4.9 MB 38.4 MB 135.2 KB 1.7 KB
18 4f/e9bd1f 5912 multiQC FAILED 1 2022-01-30 01:32:02.759 2.7s 2.7s - - - - -
40 4f/b28fa0 6633 print_report (1) COMPLETED 0 2022-01-30 01:32:04.906 13s 12.2s 40.2% 200.8 MB 1 TB 23.8 MB 6 MB