diff --git a/README.md b/README.md index efa1252e61c831f9b912221a8505c71ae54c8890..e58846dfd7ff4bfb8a2b88352928c41acbbb5da0 100644 --- a/README.md +++ b/README.md @@ -43,11 +43,9 @@ Slitherine finds significant coverage differences when comparing two contact mat ## SLITHERINE CONTENT -**slitherine.R** file that can be sourced in R or RStudio, after opening slitherine.R and modifying the parameter section +**slitherine.R** file that can be executed using a CLI (command line interface) or sourced in R or RStudio -**slitherine.config** parameter settings for the slitherine_script.R file - -**slitherine_script.R** file that can be executed using a CLI (command line interface) +**slitherine.config** parameter settings for the slitherin.R file **dataset** folder containing the datasets used in the publication @@ -68,7 +66,7 @@ Slitherine finds significant coverage differences when comparing two contact mat |mouse_zygotene_chr2_region_27700_29160.txt | mouse meoisis zygotene stage (Fig6)| -**example_of_result** folder containing an example of result obtained with slitherine, using the slitherine_script file as well as the AT_Dndj1_D_T0.filt.2500.rebin_1162-1388.txt and AT200_Aurel_T6.filt.2500.rebin_1162-1388.txt matrices (Figure 3 of the publication) +**example_of_result** folder containing an example of result obtained with slitherine, using the slitherine_R file as well as the AT_Dndj1_D_T0.filt.2500.rebin_1162-1388.txt and AT200_Aurel_T6.filt.2500.rebin_1162-1388.txt matrices (Figure 3 of the publication) **other** folder containing gitlab web interface files as well as other non essential documents. Note that the slitherine_config.doc file can facilitate the reading of the parameter setting description @@ -77,7 +75,7 @@ Slitherine finds significant coverage differences when comparing two contact mat ### Using a R GUI (graphic user interface, i.e., R or RStudio windows) -1) Open the slitherine.R file and set the parameter in the "Parameters that need to be set by the user" section +1) Open the slitherine.config file and set the parameters. The file must be present in the same directory as slitherine.R 2) Open R or RStudio @@ -90,19 +88,19 @@ source("C:/Users/Gael/Desktop/slitherine.R") ### Using a R CLI (command line interface) -1) Open the slitherine.config file and set the variables +1) Open the slitherine.config file and set the parameters. The file must be present in the same directory as slitherine.R 2) Open a shell windows -3) run slitherine_script.R, for instance using the following instruction: +3) run slitherine.R, for instance using the following instruction: ``` -Rscript slitherine_script.R slitherine.config +Rscript slitherine.R slitherine.config ``` For cygwin, use something like: ``` -/cygdrive/c/Program\ Files/R/R-3.5.3/bin/Rscript slitherine_script.R slitherine.config +/cygdrive/c/Program\ Files/R/R-3.5.3/bin/Rscript slitherine.R slitherine.config ``` diff --git a/other/slitherine.doc b/other/slitherine.doc index 0b0fc8905c03bf20e566aae423d2c40052c505da..35d8db95061f4e91bc1adbfac41142be8dfc9d71 100644 Binary files a/other/slitherine.doc and b/other/slitherine.doc differ diff --git a/other/slitherine_config.doc b/other/slitherine_config.doc index ace35ef8dcd11a71e25d9cba986842eb6308491d..3f06748b28075c82ae8a36a3015d321c624e8ed9 100644 Binary files a/other/slitherine_config.doc and b/other/slitherine_config.doc differ diff --git a/other/slitherine_script.doc b/other/slitherine_script.doc deleted file mode 100644 index 1045612e3a4b6b729a3f576ae19b86b0510f4c81..0000000000000000000000000000000000000000 Binary files a/other/slitherine_script.doc and /dev/null differ diff --git a/todo.txt b/other/todo.txt similarity index 100% rename from todo.txt rename to other/todo.txt diff --git a/slitherine.R b/slitherine.R index c64a0c34209ba60fbba59bbf630d7b7ad27b6c82..669c04596644f416fce125478d022babcd365849 100644 --- a/slitherine.R +++ b/slitherine.R @@ -1,6 +1,6 @@ ######################################################################### ## ## -## SLITHERINE v1.0.0 ## +## SLITHERINE ## ## ## ## Gael A. Millot ## ## Vittore F. Scolari ## @@ -17,7 +17,8 @@ # check that for all path, a / is added because R does not care if // is present # why alternative adj.mean.fun function does not gives the same result? # solve the problem of mean sorting to generate the theo matrices. Rigth now, sorting is performed. Code to modified is in red -# update the other slitherine files +# Take the new dev from anova contrast: 2) check no var in argument +# fun_mat_fill does not recognize half matrix anymore ################################ Aim @@ -32,7 +33,6 @@ ################################ Introduction -# Compatible with R v6.3.1 # Increase the R console window width if columns of tables are subjected to carriage return in the ...report.txt file @@ -61,7 +61,7 @@ rm(list=ls()) erase.objects = TRUE } erase.graphs = TRUE # write TRUE to erase all the graphic windows in R before starting the algorithm and FALSE otherwise -script <- "slitherine v1.0.0" +script <- "slitherine v2.0.0" ################################ End Initialization @@ -70,115 +70,53 @@ script <- "slitherine v1.0.0" ################################ Parameters that need to be set by the user -################ Mandatory settings +# see the slitherine.config file -######## File names and locations - - -project.name <- "slitherine" # name of the output folder -file.name1 <- "AT_Dndj1_D_T0.filt.2500.rebin_1162-1388.txt" # name of the first matrix file. Must be made of integer, except if theo.import parameter is set to TRUE (see below). Indeed, integers are required to generate the theoretical matrice (use of discrete distributions) -file.name2 <- "AT200_Aurel_T6.filt.2500.rebin_1162-1388.txt" # name of the second matrix file. Must be made of integer, except if theo.import parameter is set to TRUE (see below), meaning that this matrix is already serpentine binned -path.in <- "C:/Users/Gael/Documents/Hub projects/20190611 Scolari 13341/dataset/Meio/" # absolute pathway of the folder containing the input data files (file.name1 and file.name2) -path.out <- "C:/Users/Gael/Desktop/" # absolute pathway of the destination folder that will contain the results (exported data) - - -######## R packages and cute_little_R_functions file locations - - -lib.path <- NULL # vector of character that define the absolute pathway of the folder containing the R packages. Write NULL for the default path. BEWARE: default path is dependent on the system and interface used. For instance, using cygwin64 on windows 7, the path is "C:/Program Files/R/R-3.5.3/library". On the same cmputer using the R classical interface, the paths are [1] "C:/Users/Gael/Documents/R/win-library/3.5" [2] "C:/Program Files/R/R-3.5.3/library" -path.function1 <- "C:/Users/Gael/Documents/Git_projects/cute_little_R_functions/cute_little_R_functions.R" # file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" - - -######## Matrix structure - - -empty.cell.string <- 0 # if the imported matrix are half filled, put here the character string ("-" for instance) or number (0 for instance) or reserved R word (NA for instance) used to fill the empty part of the matrix. Slitherine will automatically complete the imported matrix. Write NULL if the imported matrix are not half empty - - -######## Number of Threads available - - -thread.nb <- NULL # Integer specifying the number of threads available. BEWARE: it is possible to have several threads per cpu (https://en.wikipedia.org/wiki/Thread_(computing)). Slitherine can parallelized some part of its job (including serpentine job) using thread.nb to speed up the run. If NULL, slitherine will take the number of threads available - 1. If non null, will use thread.nb if thread.nb <= number of threads available and number of threads available otherwise - - -################ End Mandatory settings - - -################ Optional settings - - -######## Serpentine - - -serp.binning <- TRUE # logical. Serpentine binning required? See https://github.com/koszullab/serpentine -python <- "C:/ProgramData/Anaconda3/python.exe" # python executable file (and pathway) to use for serpentine -path.python.lib <- "c:/programdata/anaconda3/lib/site-packages/" # vector of character that define the absolute pathway of the folder containing the serpentine package. Write NULL for the default path -serp.threshold <- 70 # serpentine threshold parameter -serp.minthreshold <- 7 # serpentine min threshold parameter -serp.iter.nb <- 128 # total serpentine iteration number parameter (this number will be split by thread.nb parameter, by progressively decreasing thread.nb til serp.iter.nb %% thread.nb != 0 (to have the same serp.iter.nb per thread) -serp.symmet.input <- TRUE # are input matrices symmetric? - - -######## HICcompare - - -hiccomp <- TRUE # logical. HICcompare required? See https://bioconductor.org/packages/release/bioc/manuals/HiCcompare/man/HiCcompare.pdf. If TRUE, the binning parameter must be non null -binning <- 2500 # integer that specifies the binning size of the imported matrices (in bp). If NULL, Hic Compare cannot be used - - -######## Theoretical matrix computation - - -# THEO1 and THEO2 matrices are synthetic matrices that are computed by slitherine using data from the 2 oberved matrices. These theoretical matrices are used to define significant difference between the two observed matrices (i.e., pixel difference far from random fluctuations). Each THEO1 and THEO2 matrix are made of n colums, each column of THEO1 representing the diagonales of OBS1, and each column of THEO2 representing the diagonales of OBS2. Thus, the number of colums of column in THEO1 and THEO2 is defined by the dimension of the observed matrices. Now the number of rows of THEO1 and THEO2, set by the n.row parameter (see below), is a variable associated to power of the significance. Greater n.row is, greater is the puissance but longer is slitherine to achieve a run, in particularly during the permutation process. Of note, In observed matrices, if the dimension is 4, 4 (square matrix) the first diagonal (main diagonal) is made of 4 values, and the last diagonal (corner diagonal) of 1 value. In theoretical matrices, such number of value is constant among the different columns representing the diagonals of the observed matrices - -theo.import <- FALSE # logical. Import theoretical matrices already obtained using slitherine? If TRUE, matrix comparison is preformed without serpentine binning (serp.binning automatically set to FALSE) -theo.file.name1 <- "mat1.theo.serp.txt" # name of the first matrix file -theo.file.name2 <- "mat2.theo.serp.txt" # name of the second matrix file -theo.path.in <- "C:/Users/Gael/Documents/Hub projects/20190611 Scolari 13341/dataset/Meio/" # absolute pathway of the folder containing the input data files (file.name1 and file.name2) -n.row <- 300 # integer value setting the number of rows of the theoretical matrices. If NULL, n.row will take the number of column of the theoretical matrices -win.size <- 20 # nb of cv values taken in the sliding windows on the CV / MEAN plot to define an average cv at the rupture slope on the CV / MEAN plot (must be less than matrix dimension - 2). Increase this value if warning messages appears saying: "PARAMETER MUST BE SUCH THAT cv^2 > 1/mu" -cv.rho.obtained <- FALSE # coefficient of variation (cv) of observed matrices 1 and 2, as well as correlation between observed matrices 1 and 2 already obtained ? If TRUE, will use the path.cv.rho parameter to load the data -path.cv.rho <- "C:/Users/Gael/Desktop/cv1_cv2_rho1_rho2_backup.RData" # file and absolute pathway to download the cv1, cv2, as well as rho1 and rho2 (which are identical) of observed matrices 1 and 2 already obtained. Write NULL if not required. Not considered if cv.rho.obtained is FALSE -correl.mat.obtained <- FALSE # theoretical matrix with permutation already obtained ? If TRUE, will use the path.theo1.theo2 parameter to load the matrices -path.theo1.theo2 <- "C:/Users/Gael/Desktop/permut_mat1_mat2_backup.RData" # file and absolute pathway to download the theoretical matrix with permutation already obtained. Write NULL if not required. Not considered if correl.mat.obtained is FALSE -single.corr <- "MAX" # either "VALUE", "MAX", "DEC1", "QUART1", "MED", "MIN" or "NO" # use MAX by default. if VALUE, a unique arbitrary value, defined in the abs.corr.limit parameter below, is used as reference to generate the correlation between the related diagonals of the theoretical theo1 and theo2 matrices (all the correlations between theo1 and theo2 diagonals will be close to abs.corr.limit). If MAX, the maximal correlation value between the observed mat1 and mat2 matrix diagonals will be used to generate the correlation between the related diagonals of theo1 and theo2 matrices (all the correlations between theo1 and theo2 diagonals will be close to max(rho1)). If DEC1, QUART1 MED or MIN, the same as MAX but using the first decile, first quartile, median or minimal correlation value between the observed mat1 and mat2 matrix diagonals, respectively. If NO, each of the observed correlations between the related diagonals of the mat1 and mat2 matrices will be used to generate the correlation of the corresponding theo1 and theo2 diagonal. In the case of NO, any observed correlation below the abs.corr.limit parameter will be set to abs.corr.limit (to avoid very long computing needed for very weak correlations) -abs.corr.limit <- 0.2 # parameter used when single.corr <- "VALUE" or single.corr <- "NO". See the single.corr parameter description -print.count <- 1e6 # during the correlation adjustment process, print a message every print.count loops ? -keep <- TRUE # keep the intermediate matrices and big objects in the working environment til the end? If TRUE, everything is saved in the final all_objects.RData. If FALSE, intermediate matrices are saved in different .RData files and then removed all along the script execution - - -######## Significant regions between the two compared matrices - - -ratio.limit.sig <- 2 # ratio value between the two matrice pixel, below which ratio is not significant? From 1 to +Inf (2 means coverage ratio less than 2 is not significant, 1 means no cutoffs in ratio values) -error <- 0 # from 0 to 1. Proportion of false positives (i.e., theo dots considered as observed dots). 0.05 means 5%, 0 means that the significant observed dot are outside of the theo cloud -range.split <- 25 # for the significant dots. If x.range is the range of the dots on the x-axis, then abs(diff(x.range) / range.split) gives the window size. Window size decreases when range.split increases -step.factor <- 10 # for the significant dots. x.win.size / step.factor gives the shift step of the window. When step.factor = 1, no overlap during the sliding. If step.factor = 2, 50% of overlap during 1 slide -ratio.normalization <- TRUE # logical. Divide the cell ratio matrix mat2 / mat1 (differential matrix) by the ratio factor mean(mat2) / mean(mat1)? If TRUE, this means that the mean of the normalized cell ratio matrix is 1, and log (parameter transfo <- TRUE)is 0 - - -######## Graphical and display parameters +################################ End Parameters that need to be set by the user -# all plots -activate.pdf <- TRUE # write TRUE for pdf display and FALSE for R display (main graphs) -optional.text <- "" # write here an optional text to include in results and graphs -width.wind <- 7 # window width (in inches) -height.wind <- 7 # window height (in inches) -dot.size <- 2 # increase or decrease the value to increase or decrease the size of the dots -line.size <- 0.75 # increase or decrease the value to increase or decrease the size of the lines -heatmap.text.size <- 8 # increase or decrease the value to increase or decrease the size of the heatmap scale text -text.size <- 12 # increase or decrease the value to increase or decrease the size of the axis text and legend text -title.text.size <- 4.5 # increase or decrease the value to increase or decrease the size of the title text -raster <- TRUE # raster mode for dot plots ? -transfo <- "log2" # Either "log2" (matrix values will be log2 converted, and sometimes log2(x +1) converted, +1 to deal with zero) or "log10" ((matrix values will be log10 converted and sometimes log10(x +1) converted, +1 to deal with zero). BEWARE: observed matrices must remain integers for serpentine. Log is only applied for display, the reason why the option "no" is not proposed (which would mean data already log converted) +################################ Config import -################ End Optional settings +if(interactive() == FALSE){ # if(grepl(x = commandArgs(trailingOnly = FALSE), pattern = "R\\.exe$|\\/R$|Rcmd\\.exe$|Rcmd$|Rgui\\.exe$|Rgui$|Rscript\\.exe$|Rscript$|Rterm\\.exe$|Rterm$")){ # detection of script usage +run.way <- "SCRIPT RUNNING DETECTION" +cat(paste0("\n\n", run.way, "\n")) +command <- paste0(commandArgs(trailingOnly = FALSE), collapse = ",") # recover the full command +args <- commandArgs(trailingOnly = TRUE) # recover arguments written after the call of the R script +if(any(is.na(args))){ +stop(paste0("\n\n================\n\nERROR IN SLITHERINE\nTHE args OBJECT HAS NA\n\n================\n\n"), call. = FALSE) +} +tempo.arg.names <- c("config.path") # objects names exactly in the same order as in the bash code and recovered in args. Here only one, because only the path of the config file to indicate after the SLITHERINE script execution +if(length(args) != length(tempo.arg.names)){ +stop(paste0("\n\n================\n\nERROR IN SLITHERINE\nTHE NUMBER OF ELEMENTS IN args (", length(args),") IS DIFFERENT FROM THE NUMBER OF ELEMENTS IN tempo.arg.names (", length(tempo.arg.names),")\nargs:", paste0(args, collapse = ","), "\ntempo.arg.names:", paste0(tempo.arg.names, collapse = ","), "\n\n================\n\n"), call. = FALSE) +} +for(i1 in 1:length(tempo.arg.names)){ +assign(tempo.arg.names[i1], args[i1]) +} +rm(tempo.arg.names, args, i1) +if( ! file.exists(config.path)){ +stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nCONFIG FILE NAME AND PATH INDICATED IN EXECUTION COMMAND DOES NOT EXISTS: ", config.path, "\n\n============\n\n"), call. = FALSE) +}else{ +source(config.path) # source the config parameters +rm(config.path) +} +}else if(sys.nframe() == 0L){ # detection of copy-paste/direct execution (for debugging). With script it is also 0, with source, it is 4 +run.way <- "DIRECT RUNNING DETECTION" +cat(paste0("\n\n", run.way, "\n")) +source("C:/Users/Gael/Documents/Git_projects/SLITHERINE/slitherine.config", local = .GlobalEnv) +}else{ +run.way <- "SOURCE RUNNING DETECTION" +cat(paste0("\n\n", run.way, "\n")) +if( ! file.exists(paste0(dirname(parent.frame(2)$ofile), "/slitherine.config"))){ +stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nslitherine.config FILE NOT PRESENT WHERE THE anova.contrast_source FILE IS LOCATED:\n", dirname(parent.frame(2)$ofile), "\nPLEASE, DO NOT MODIFY THE NAME OF THE slitherine.config FILE (OR MODIFY THE CODE IN THE anova.contrast_source FILE, CONFIG IMPORT SECTION)\n\n============\n\n"), call. = FALSE) +}else{ +source(paste0(dirname(parent.frame(2)$ofile), "/slitherine.config"), local = .GlobalEnv) # source the parameters used below +} +} -################################ End Parameters that need to be set by the user +################################ End Config import ################################ Recording of the initial parameters @@ -188,6 +126,8 @@ param.list <- c( "erase.objects", "erase.graphs", "script", +"run.way", +if(run.way == "SCRIPT RUNNING DETECTION"){"command"}, "project.name", "file.name1", "file.name2", @@ -235,7 +175,8 @@ param.list <- c( "text.size", "title.text.size", "raster", -"transfo" +"transfo", +"warn.secu" ) if(any(duplicated(param.list))){ stop(paste0("\n\n================\n\nINTERNAL CODE ERROR IN SLITHERINE\nTHE param.list OBJECT CONTAINS DUPLICATED ELEMENTS:\n", paste(param.list[duplicated(param.list)], collapse = " "), "\n\n================\n\n")) # message for developers @@ -932,7 +873,8 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size_5fun, title.text.size = title.text.size_5fun, -classic = TRUE, +article = TRUE, +legend.width = NULL, raster = raster_5fun, x.left.extra.margin = 0, x.right.extra.margin = 0.05, @@ -1004,7 +946,8 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size_5fun, title.text.size = title.text.size_5fun, -classic = TRUE, +article = TRUE, +legend.width = NULL, raster = raster_5fun, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, @@ -1016,7 +959,7 @@ y.bottom.extra.margin = 0 # y.range <- range(volcano$P_VALUE, na.rm = TRUE, finite = TRUE) # already computed above # if(transfo_5fun != "no"){fold.y.range.limit.inf <- ratio.cutoff.inf_5fun - mean.for.fold ; fold.y.range.limit.sup <- ratio.cutoff.sup_5fun - mean.for.fold }else{ fold.y.range.limit.inf <- ratio.cutoff.inf_5fun / mean.for.fold ; fold.y.range.limit.sup <- ratio.cutoff.sup_5fun / mean.for.fold} # tempo.title <- paste0("OBS MATRICES\nVOLCANO PLOT (USING FOLD CHANGE AS RATIO / MEAN(RATIO))\n", if(transfo_5fun == "log2"){"LOG2(x) FOLD CHANGE AND LOG10(y) P VALUE"}else if(transfo_5fun == "log10"){"LOG10(x) FOLD AND LOG10(y) P VALUE"}else{"NO LOG TRANSFORMATION FOR FOLD AND LOG10(y) P VALUE"}, "\nX SCALE RANGE: ", paste(fun_round(x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(y.range, 2), collapse = " , "), "\nHIC COMPARE USING ", pvalue.text_5fun, " P VALUES\nCUT-OFFS (ADJUSTED ON THE MEAN OF THE OBS RATIOS): ", fun_round(fold.y.range.limit.inf), " AND ", fun_round(fold.y.range.limit.sup)) -# fun_gg_scatter(data1 = list(L1 = volcano, L2 = data.frame(y = c(fold.y.range.limit.inf, fold.y.range.limit.sup), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "FOLD", L2 = "y"), y = list(L1 = "P_VALUE", L2 = NULL), categ = list(L1 = "SIGNIF", L2 = "CUTOFFS"), color = list(L1 = c(grey(0.40), "red"), L2 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_vline"), legend.name = list(L1 = "P VALUE", L2 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), alpha = list(0.2, 1), dot.size = dot.size_5fun, line.size = line.size, x.log = transfo_5fun, x.lab = "FOLD CHANGE", x.tick.nb = 8, y.log = "log10", y.lab = "P VALUE", y.lim = rev(y.range), y.tick.nb = 8, title = tempo.title, text.size = text.size_5fun, title.text.size = title.text.size_5fun, classic = TRUE, raster = raster_5fun, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0) # +# fun_gg_scatter(data1 = list(L1 = volcano, L2 = data.frame(y = c(fold.y.range.limit.inf, fold.y.range.limit.sup), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "FOLD", L2 = "y"), y = list(L1 = "P_VALUE", L2 = NULL), categ = list(L1 = "SIGNIF", L2 = "CUTOFFS"), color = list(L1 = c(grey(0.40), "red"), L2 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_vline"), legend.name = list(L1 = "P VALUE", L2 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), alpha = list(0.2, 1), dot.size = dot.size_5fun, line.size = line.size, x.log = transfo_5fun, x.lab = "FOLD CHANGE", x.tick.nb = 8, y.log = "log10", y.lab = "P VALUE", y.lim = rev(y.range), y.tick.nb = 8, title = tempo.title, text.size = text.size_5fun, title.text.size = title.text.size_5fun, article = FALSE, raster = raster_5fun, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0) # # end volcano plot # mask computation log.cutoff <- df_5fun$RATIO > ratio.cutoff.inf_5fun & df_5fun$RATIO < ratio.cutoff.sup_5fun # dots that are NOT outside of the 2 cutoffs. obs obtained above. 1D Vector of position @@ -1101,13 +1044,13 @@ tempo <- fun_check(data = file.name1, class = "vector", mode = "character", fun. if(tempo$problem == FALSE & substr(file.name1, nchar(file.name1) - 3, nchar(file.name1)) != ".txt"){ tempo.warning <- paste0("THE file.name1 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } tempo <- fun_check(data = file.name2, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) if(tempo$problem == FALSE & substr(file.name2, nchar(file.name2) - 3, nchar(file.name2)) != ".txt"){ tempo.warning <- paste0("THE file.name2 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } tempo <- fun_check(data = path.in, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) if(tempo$problem == FALSE & ! dir.exists(path.in)){ @@ -1172,13 +1115,13 @@ tempo <- fun_check(data = theo.file.name1, class = "vector", mode = "character", if(tempo$problem == FALSE & substr(theo.file.name1, nchar(theo.file.name1) - 3, nchar(theo.file.name1)) != ".txt"){ tempo.warning <- paste0("THE theo.file.name1 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } tempo <- fun_check(data = theo.file.name2, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) if(tempo$problem == FALSE & substr(theo.file.name2, nchar(theo.file.name2) - 3, nchar(theo.file.name2)) != ".txt"){ tempo.warning <- paste0("THE theo.file.name2 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } tempo <- fun_check(data = theo.path.in, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) if(tempo$problem == FALSE & ! dir.exists(theo.path.in)){ @@ -1265,6 +1208,7 @@ path.python.lib_2fun = path.python.lib ################ Ignition +function.name <- "slitherine.R EXECUTION" set.seed(1014) options(scipen = 7) analysis.nb <- trunc(as.numeric(Sys.time())) # to provide a specific number ot each analysis @@ -1309,7 +1253,7 @@ graphics.off() }else{ tempo.warning <- paste0("GRAPHICS HAVE NOT BEEN ERASED. GRAPHICAL PARAMETERS MAY HAVE NOT BEEN REINITIALIZED") fun_report(data = tempo.warning, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } @@ -1326,7 +1270,7 @@ mat1.mix <- as.matrix(read.table(paste0(theo.path.in, "/", theo.file.name1))) mat2.mix <- as.matrix(read.table(paste0(theo.path.in, "/", theo.file.name2))) tempo.warning <- paste0("IMPORT OF THEORETICAL MATRICES, WHICH WOULD MEAN THAT THE OBSERVED MATRICES IMPORTED HAVE ALREADY BEEN BINNED USING SERPENTINE") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } @@ -1389,7 +1333,7 @@ cat(paste0("\n\n============\n\nERROR IN SLITHERINE: MATRIX 1 AND 2 MUST BE SQUA tempo.cat <- paste0("MATRIX 1 AND 2 HAVE IDENTICAL CONTENT") cat(paste0("\nWARNING: ", tempo.cat, "\n")) fun_report(data = tempo.cat, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } if(theo.import == TRUE){ # check the variable names. Still mix ? for(i0 in 1:2){ @@ -1408,7 +1352,7 @@ cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIMENSIONS OF THEORETICAL M tempo.cat <- paste0("THEORETICAL MATRIX 1 AND 2 HAVE IDENTICAL CONTENT") cat(paste0("\nWARNING: ", tempo.cat, "\n")) fun_report(data = tempo.cat, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } if(ncol(mat1.mix) != ncol(mat1.obs.ini)){ param.check <- c(param.check, TRUE) @@ -1443,10 +1387,10 @@ if( ! any(mat1.obs.ini %in% empty.cell.string)){ # works for NA, Inf, etc. stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nPARAMETER empty.cell.string SET TO ", empty.cell.string, ", WHICH IS NOT PRESENT IN THE IMPORTED MATRIX 1\n\n============\n\n")) } tempo.mat1.obs <- fun_mat_fill(mat = mat1.obs.ini, empty.cell.string = empty.cell.string) -if( ! is.null(tempo.mat1.obs$warnings)){ +if( ! is.null(tempo.mat1.obs$warn)){ mat1.modif <- TRUE -fun_report(data = tempo.mat1.obs$warnings, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat1.obs$warnings) +fun_report(data = tempo.mat1.obs$warn, path = path.out, output = log.file) +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat1.obs$warn) }else{ fun_report(data = paste0("MATRIX 1 DETECTED AS SYMMETRIC"), path = path.out, output = log.file) } @@ -1455,10 +1399,10 @@ if( ! any(mat2.obs.ini %in% empty.cell.string)){ # works for NA, Inf, etc. stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nPARAMETER empty.cell.string SET TO ", empty.cell.string, ", WHICH IS NOT PRESENT IN THE IMPORTED MATRIX 2\n\n============\n\n")) } tempo.mat2.obs <- fun_mat_fill(mat = mat2.obs.ini, empty.cell.string = empty.cell.string) -if( ! is.null(tempo.mat2.obs$warnings)){ +if( ! is.null(tempo.mat2.obs$warn)){ mat2.modif <- TRUE -fun_report(data = tempo.mat2.obs$warnings, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat2.obs$warnings) # in fact, abs(tempo.cor) is systematically used +fun_report(data = tempo.mat2.obs$warn, path = path.out, output = log.file) +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat2.obs$warn) # }else{ fun_report(data = paste0("MATRIX 2 DETECTED AS SYMMETRIC"), path = path.out, output = log.file) } @@ -1486,7 +1430,7 @@ mat2.obs[as.matrix(as.data.frame(list(1:nrow(mat2.obs), 1:ncol(mat2.obs))))] <- mat2.modif <- TRUE tempo.warning <- paste0("MAIN DIAGONAL OF MATRIX 2 HAS BEEN REPLACED BY 0") fun_report(data = tempo.warning, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } # end diagonal removal @@ -1805,7 +1749,7 @@ tempo.data.plot2$MEAN.MINUS.SD[is.na(tempo.data.plot2$MEAN.MINUS.SD)] <- tempo.d } tempo.data.plot2 <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot2), tempo.data.plot2) tempo.title <-paste0("MAT1 OBS\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nDIAGONAL NB FROM MAIN TO CORNER\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot # Mean Deviation (MD) plot of the observed matrix if(activate.pdf == TRUE){ @@ -1814,7 +1758,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT1 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end Mean Deviation (MD) plot of the observed matrix # mean / cor of the observed matrix @@ -1824,7 +1768,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT1 OBS\nMEAN VERSUS MAT1 OBS / MAT2 OBS SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end mean / cor of the observed matrix # mean / cv of the observed matrix @@ -1834,7 +1778,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT1 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND CV LOG2(x) "}else if(transfo == "log10"){"MEAN AND CV LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cv.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cv.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs # end mean / cv of the observed matrix # cv selection using increasing windows to detect the average constant cv @@ -1846,7 +1790,7 @@ win.size.ini <- win.size if(win.size >= length(sort.cv1)){ tempo.warning <- paste0("THE win.size PARAMETER SETTING (", win.size, ") IS OVER OR EQUAL TO THE NUMBER OF NON NA DIAGONAL CV OF THE OBSERVED MATRIX 1 (", length(sort.cv1), ")\nTHE win.size PARAMETER HAS BEEN RESET TO VALUE: ", length(sort.cv1) - 1) cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # win.size <- length(sort.cv1) - 1 fun_report(data = tempo.warning, output = log.file, path = path.out) } @@ -1899,7 +1843,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT1 OBS\nSLIDING CV COMPUTATION\nBLUE DOTS: MEAN+/-SD OF CV IN INCREASING WINDOWS OF CV VALUES\nGREEN LINE: SELECTED CV VALUE IS ", round(cv.select.mat1.obs, 2), ", BASED ON MEDIAN OF ", cv.select.nb, " BLUE DOTS\nRED LINE: POISSON DISTRIB\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat1.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat1.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) # cv selection using increasing windows to detect the average constant cv # END MATRIX 1 @@ -1949,7 +1893,7 @@ tempo.data.plot2$MEAN.MINUS.SD[is.na(tempo.data.plot2$MEAN.MINUS.SD)] <- tempo.d } tempo.data.plot2 <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot2), tempo.data.plot2) tempo.title <-paste0("MAT2 OBS\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nDIAGONAL NB FROM MAIN TO CORNER\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot # Mean Deviation (MD) plot of the observed matrix if(activate.pdf == TRUE){ @@ -1958,7 +1902,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT2 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end Mean Deviation (MD) plot of the observed matrix # mean / cor of the observed matrix @@ -1968,7 +1912,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT2 OBS\nMEAN VERSUS MAT1 OBS / MAT2 OBS SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end mean / cor of the observed matrix # mean / cv of the observed matrix @@ -1978,7 +1922,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT2 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND CV LOG2(x) "}else if(transfo == "log10"){"MEAN AND CV LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cv.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cv.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs # end mean / cv of the observed matrix # cv selection using increasing windows to detect the average constant cv @@ -1990,7 +1934,7 @@ win.size <- win.size.ini # in case modified during first MAT1 analysis if(win.size >= length(sort.cv2)){ tempo.warning <- paste0("THE win.size PARAMETER SETTING (", win.size, ") IS OVER OR EQUAL TO THE NUMBER OF NON NA DIAGONAL CV OF THE OBSERVED MATRIX 2 (", length(sort.cv2), ")\nTHE win.size PARAMETER HAS BEEN RESET TO VALUE: ", length(sort.cv2) - 1) cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # win.size <- length(sort.cv2) - 1 fun_report(data = tempo.warning, output = log.file, path = path.out) } @@ -2044,7 +1988,7 @@ invisible(dev.set(pdf.nb)) fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) } tempo.title <- paste0("MAT2 OBS\nSLIDING CV COMPUTATION\nBLUE DOTS: MEAN+/-SD OF CV IN INCREASING WINDOWS OF CV VALUES\nGREEN LINE: SELECTED CV VALUE IS ", round(cv.select.mat2.obs, 2), ", BASED ON MEDIAN OF ", cv.select.nb, " BLUE DOTS\nRED LINE: POISSON DISTRIB\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat2.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, x.lim = m_cv.x.range, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cv.y.range, y.log = transfo, y.lab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat2.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) # cv selection using increasing windows to detect the average constant cv # END MATRIX 2 @@ -2270,7 +2214,7 @@ tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.d tempo.data.negbinom <- data.frame(x.green = mean.nb1, y.green = cv.nb1 * mean.nb1) # neg binomiale distrib } tempo.title <- paste0("MAT1 THEO\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end Mean Deviation (MD) plot of the observed matrix # mean versus index plot if(activate.pdf == TRUE){ @@ -2287,7 +2231,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT1 THEO\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot @@ -2328,7 +2272,7 @@ tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.d tempo.data.negbinom <- data.frame(x.green = mean.nb2, y.green = cv.nb2 * mean.nb2) # neg binomiale distrib } tempo.title <- paste0("MAT2 THEO\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end Mean Deviation (MD) plot # mean versus index plot if(activate.pdf == TRUE){ @@ -2345,7 +2289,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT2 THEO\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot @@ -2374,7 +2318,7 @@ if(transfo != "no"){ tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap } tempo.title <- paste0("(MAT1 OBS / MAT2 OBS) MEAN VERSUS (MAT1 OBS / MAT2 OBS) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end mean / cor of the observed matrix # mean / cor of the theoretical matrices if(activate.pdf == TRUE){ @@ -2387,7 +2331,7 @@ if(transfo != "no"){ tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap } tempo.title <- paste0("(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # Permutation of the mat1.ini values to have the mat1.obs correlation on the diagonales between mat1 and mat2 if(correl.mat.obtained == TRUE){ tempo.cat <- paste0("BEWARE: mat1.ini, mat1.perm, mat2.ini and mat2.perm THERORETICAL MATRICES USED HAVE BEEN DOWNLOADED FROM:\n", path.theo1.theo2) @@ -2585,7 +2529,7 @@ for(i0 in 1:length(paral.output.list)){ # compartment relatives to each parallel for(i1 in 1:length(paral.output.list[[i0]])){ # compartment relatives to each diagonal inside parallelization permut.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- paral.output.list[[i0]][[i1]]$data names(permut.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] -warning.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- if(is.null(paral.output.list[[i0]][[i1]]$warnings)){"NO WARNING"}else{paral.output.list[[i0]][[i1]]$warnings} # no NULL assignation in list +warning.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- if(is.null(paral.output.list[[i0]][[i1]]$warn)){"NO WARNING"}else{paral.output.list[[i0]][[i1]]$warn} # no NULL assignation in list names(warning.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] cor.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- paral.output.list[[i0]][[i1]]$cor names(cor.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] @@ -2645,13 +2589,13 @@ tempo.title <- paste0("BEFORE PERMUTATION | (MAT1 THEO / MAT2 THEO) SPEARMAN C # par(xpd = TRUE) # text(x = tempo.coord$x.left.plot.region, y = tempo.coord$y.top.fig.region, labels = tempo.title, cex = 0.65, adj = c(0, 1)) # par(xpd = FALSE) -fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, x.lim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, y.lim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = FALSE) +fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, x.lim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, y.lim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = FALSE) tempo.data.plot <- data.frame(MAT1 = permut.list[[i0]], MAT2 = mat2.perm[, i0]) x.tempo.range <- range(tempo.data.plot[, "MAT1"], na.rm = TRUE, finite = TRUE) y.tempo.range <- range(tempo.data.plot[, "MAT2"], na.rm = TRUE, finite = TRUE) tempo.cor <- suppressWarnings(cor(x = tempo.data.plot[, "MAT1"], y = tempo.data.plot[, "MAT2"], use = "pairwise.complete.obs", method = "spearman")) tempo.title <- paste0("AFTER PERMUTATION | (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\nOBS MATRIX CORRESPONDING DIAGONAL NUMBER: ", i0, "\nCORRELATION VALUE: ", if(is.na(tempo.cor)){NA}else{fun_round(tempo.cor)}, "\nCORRELATION INDICATED BY THE fun_permut() FUNCTION: ", if(is.na(cor.list[[i0]])){NA}else{fun_round(cor.list[[i0]])}, "\nX SCALE RANGE: ", paste(fun_round(x.tempo.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(y.tempo.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = fun_gg_palette(1), geom = "geom_point", alpha = 0.5, dot.size = dot.size, x.lim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, y.lim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = FALSE) +fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = fun_gg_palette(1), geom = "geom_point", alpha = 0.5, dot.size = dot.size, x.lim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, y.lim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = FALSE) # fun_prior_plot(param.reinitial = FALSE, xlog.scale = FALSE, ylog.scale = FALSE, remove.label = TRUE, remove.x.axis = TRUE, remove.y.axis = TRUE, std.x.range = FALSE, std.y.range = FALSE, down.space = height.wind / 7 * 1, left.space = width.wind / 7 * 1, up.space = height.wind / 7 * 1, right.space = width.wind / 7 * 1, orient = 1, dist.legend = 5, tick.length = 0.5, box.type = "n", amplif.label = 1, amplif.axis = 1, display.extend = FALSE, return.par = FALSE) # plot(tempo.data.plot[, "MAT1"], tempo.data.plot[, "MAT2"], col = hsv(1, 0.5, 1, 0.5), pch = 16, x.lim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, y.lim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}) # tempo.coord <- fun_post_plot(x.side = 1, x.lab = "MAT1", y.side = 2, y.lab = "MAT2", x.axis.magnific = 1, x.label.magnific = 1, y.axis.magnific = 1, y.label.magnific = 1) @@ -2688,7 +2632,7 @@ if(transfo != "no"){ tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap } tempo.title <- paste0("AFTER PERMUTATION\n(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end mean / cor of the theoretical matrices # correlation versus index before and after permutation if(activate.pdf == TRUE){ @@ -2702,7 +2646,7 @@ stop(tempo.cat) } tempo.data.plot <- data.frame(CORRELATION = c(rho2, unlist(cor.list)), COLUMN_NB = c(1:length(rho2), 1:length(rho2)), KIND = rep(c("OBS", "THEO"), each = length(rho2))) tempo.title <- paste0("SPEARMAN CORRELATION BETWEEN MAT1 OBS / MAT2 OBS\nCOMPARISON WITH MAT1 THEO / MAT2 THEO AFTER PERMUTATION\nX SCALE RANGE: ", paste(range(1:length(rho2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: -1, 1") -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL INDEX", x.tick.nb = 8, y.log = "no", y.lab = "CORRELATION", y.lim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL INDEX", x.tick.nb = 8, y.log = "no", y.lab = "CORRELATION", y.lim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster) # end correlation versus index before and after permutation @@ -2777,7 +2721,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND BEFORE SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) }else{ @@ -2792,7 +2736,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND BEFORE SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot before sub sampling if(keep == FALSE){ tempo.list <- c("mat1.perm", "mat2.perm") @@ -2814,7 +2758,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND AFTER SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) }else{ @@ -2829,7 +2773,7 @@ tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.dat } tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND AFTER SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "COLUMN NB", x.tick.nb = 8, y.log = transfo, y.lab = "MEAN", y.lim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) # end mean versus index plot after sub sampling @@ -2851,7 +2795,7 @@ tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.d tempo.data.negbinom <- data.frame(x.green = mean.nb1, y.green = cv.nb1 * mean.nb1) # neg binomiale distrib } tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND SUB SAMPLING\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end MD plot for the theoretical matrix 1 after sub sampling and permutation # MD plot for the theoretical matrix 2 after sub sampling and permutation @@ -2867,7 +2811,7 @@ tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.d tempo.data.negbinom <- data.frame(x.green = mean.nb2, y.green = cv.nb2 * mean.nb2) # neg binomiale distrib } tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND SUB SAMPLING\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, x.lim = m_sd.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_sd.y.range2, y.log = transfo, y.lab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # end MD plot for the theoretical matrix 2 after sub sampling and permutation # mean / cor of the theoretical matrices @@ -2881,7 +2825,7 @@ if(transfo != "no"){ tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap } tempo.title <- paste0("AFTER PERMUTATION AND SUB SAMPLING\n(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs +fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, x.lim = m_cor.x.range2, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.lim = m_cor.y.range2, y.log = "no", y.lab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs # correlation versus index before and after subsampling tempo.cor <- suppressWarnings(mapply(FUN = "cor", c(data.frame(mat1.mix)), c(data.frame(mat2.mix)), use = "pairwise.complete.obs", method = "spearman")) if(activate.pdf == TRUE){ @@ -2895,7 +2839,7 @@ stop(tempo.cat) } tempo.data.plot <- data.frame(CORRELATION = c(rho2, unlist(cor.list), tempo.cor), COLUMN_NB = c(1:length(rho2), 1:length(rho2), 1:length(rho2)), KIND = rep(c("OBS", "THEO_AFTER_PERMUTATION", "THEO_AFTER_DOWNSAMPLING"), each = length(rho2))) tempo.title <- paste0("SPEARMAN CORRELATION BETWEEN MAT1 OBS / MAT2 OBS\nCOMPARISON WITH MAT1 THEO / MAT2 THEO 1) AFTER PERMUTATION AND 2) AFTER PERMUTATION AND DOWNSAMPLING\nX SCALE RANGE: ", paste(range(1:length(rho2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: -1, 1") -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL INDEX", x.tick.nb = 8, y.log = "no", y.lab = "CORRELATION", y.lim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster) +fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, x.log = "no", x.lab = "DIAGONAL INDEX", x.tick.nb = 8, y.log = "no", y.lab = "CORRELATION", y.lim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, article = TRUE, legend.width = NULL, raster = raster) # end correlation versus index before and after subsampling @@ -2910,7 +2854,7 @@ if(serp.binning == TRUE){ serp.binning <- FALSE tempo.warning <- paste0("THE serp.binning PARAMETER SETTING HAS BEEN SET TO TRUE, BUT THEORETICAL MATRICES HAVE BEEN IMPORTED\n-> PRE SERPENTINE ANALYSIS IS SUFFICIENT (IF YOU NEED MORE SERPENTINE BINNING, INCREASE THE VALUE OF THE serp.iter.nb PARAMETER)\n-> serp.binning PARAMETER RESET TO FALSE") cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # } } @@ -3064,9 +3008,9 @@ fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES PRE SERPENTINE\n(SLITHERINE)" fun_report(data = paste0("SEGMENTATION OF THE THEORETICAL DOT CLOUD (MEAN CELL / RATIO CELL PLOT) PERFORMED ON NON NORMALIZED DATA\nBECAUSE OF DISCRETE RATIO VALUES THAT ARE PRESENTS FOR LOW MEANS\nINDEED, NORMALIZATION SLIGHTLY SHIFTS THE RATIO VALUES BETWEEN THEO AND OBS CLOUDS, WHICH INDUCES LOW MEAN CELL DOTS (I.E., NOISE) TO BE SIGNIFICANT AFTER SEGMENTATION. THEN, DEPENDING ON USER SETTINGS (SEE ABOVE), RATIO NORMALIZATION ARE PERFORMED ON SEGMENTATION RESULT DATA, BEFORE APPLYING THE CUTOFFS FILTERING"), output = log.file, path = path.out) segment.pre.serp <- fun_segmentation(data1 = theo.df.pre.for.segm, x1 = "MEAN", y1 = "RATIO", x.range.split = range.split, x.step.factor = step.factor, error = error, data2 = obs.df.pre.for.segm, x2 = "MEAN", y2 = "RATIO", data2.pb.dot = "signif", plot = FALSE, graph.in.file = FALSE) fun_report(data = "UNKNOWN DOTS HAVE BEEN CONSIDERED AS SIGNIFICANTS (ARGUMENT data2.pb.dot OF fun_segmentation() SET TO \"signif\")", output = log.file, path = path.out) -# cat(paste0("\n", segment.pre.serp$warnings, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.pre.serp$warnings) # in fact, abs(tempo.cor) is systematically used -fun_report(data = segment.pre.serp$warnings, output = log.file, path = path.out) +# cat(paste0("\n", segment.pre.serp$warn, "\n")) +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.pre.serp$warn) # +fun_report(data = segment.pre.serp$warn, output = log.file, path = path.out) if( ! is.null(segment.pre.serp$hframe)){ names(segment.pre.serp$hframe)[names(segment.pre.serp$hframe) == "x"] <- "MEAN" # names(segment.pre.serp$hframe)[names(segment.pre.serp$hframe) == "y"] <- "RATIO" # @@ -3177,10 +3121,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.theo.dot.pre)){ # signif dots in theo matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE + THEO SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = theo.df.pre, L2 = signif.theo.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = theo.df.pre, L2 = signif.theo.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE (NO THEO SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = theo.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = theo.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) @@ -3189,10 +3133,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.obs.dot.pre)){ # signif dots in obs matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE + OBS SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = obs.df.pre, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = obs.df.pre, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE (NO OBS SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = obs.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = obs.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) @@ -3201,10 +3145,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.obs.dot.pre)){ # signif dots in obs matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT + OBS SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = final.df.pre, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = final.df.pre, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT (NO OBS SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.pre, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.pre, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.pre), " AND ", fun_round(ratio.cutoff.sup.pre)) -fun_gg_scatter(data1 = list(L1 = final.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = final.df.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.pre, ratio.cutoff.sup.pre), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.pre, y.lim = segm.y.range.pre, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } # end plot verif obs dots outside # end MD overlay plot before serpentine @@ -3454,7 +3398,7 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, -classic = TRUE, +article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, @@ -3507,7 +3451,7 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, -classic = TRUE, +article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, @@ -3980,9 +3924,9 @@ fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES POST SERPENTINE\n(SLITHERINE) fun_report(data = paste0("SEGMENTATION OF THE THEORETICAL DOT CLOUD (MEAN CELL / RATIO CELL PLOT) PERFORMED ON NON NORMALIZED DATA\nBECAUSE OF DISCRETE RATIO VALUES THAT ARE PRESENTS FOR LOW MEANS\nINDEED, NORMALIZATION SLIGHTLY SHIFTS THE RATIO VALUES BETWEEN THEO AND OBS CLOUDS, WHICH INDUCES LOW MEAN CELL DOTS (I.E., NOISE) TO BE SIGNIFICANT AFTER SEGMENTATION. THEN, DEPENDING ON USER SETTINGS (SEE ABOVE), RATIO NORMALIZATION ARE PERFORMED ON SEGMENTATION RESULT DATA, BEFORE APPLYING THE CUTOFFS FILTERING"), output = log.file, path = path.out) segment.post.serp <- fun_segmentation(data1 = theo.df.post.for.segm, x1 = "MEAN", y1 = "RATIO", x.range.split = range.split, x.step.factor = step.factor, error = error, data2 = obs.df.post.for.segm, x2 = "MEAN", y2 = "RATIO", data2.pb.dot = "signif", plot = FALSE, graph.in.file = FALSE) fun_report(data = "UNKNOWN DOTS HAVE BEEN CONSIDERED AS SIGNIFICANTS (ARGUMENT data2.pb.dot OF fun_segmentation() SET TO \"signif\")", output = log.file, path = path.out) -# cat(paste0("\n", segment.post.serp$warnings, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.post.serp$warnings) # in fact, abs(tempo.cor) is systematically used -fun_report(data = segment.post.serp$warnings, output = log.file, path = path.out) +# cat(paste0("\n", segment.post.serp$warn, "\n")) +warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.post.serp$warn) # +fun_report(data = segment.post.serp$warn, output = log.file, path = path.out) if( ! is.null(segment.post.serp$hframe)){ names(segment.post.serp$hframe)[names(segment.post.serp$hframe) == "x"] <- "MEAN" # names(segment.post.serp$hframe)[names(segment.post.serp$hframe) == "y"] <- "RATIO" # @@ -4093,10 +4037,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.theo.dot.post)){ # signif dots in theo matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE + THEO SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = theo.df.post, L2 = signif.theo.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = theo.df.post, L2 = signif.theo.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE (NO THEO SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = theo.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = theo.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) @@ -4105,10 +4049,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.obs.dot.post)){ # signif dots in obs matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE + OBS SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = obs.df.post, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = obs.df.post, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE (NO OBS SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = obs.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = obs.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } if(activate.pdf == TRUE){ invisible(dev.set(pdf.nb)) @@ -4117,10 +4061,10 @@ fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wi } if( ! is.null(signif.obs.dot.post)){ # signif dots in obs matrices tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT + OBS SIGNIFICANT DOTS\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = final.df.post, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = final.df.post, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) }else{ tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT (NO OBS SIGNIFICANT DOTS)\n", ifelse(ratio.normalization == TRUE, "THEO AND OBS RATIOS HAVE BEEN NORMALIZED (EACH CENTERED ON RATIO 1)\n", ""), if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range.post, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range.post, 2), collapse = " , "), "\nCUT-OFFS DEFINED BY THE USER: ", fun_round(ratio.cutoff.inf.post), " AND ", fun_round(ratio.cutoff.sup.post)) -fun_gg_scatter(data1 = list(L1 = final.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) +fun_gg_scatter(data1 = list(L1 = final.df.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(ratio.cutoff.inf.post, ratio.cutoff.sup.post), CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "MEAN", L4 = NULL), y = list(L1 = "RATIO", L3 = "RATIO", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, x.log = transfo, x.lab = "MEAN", x.tick.nb = 8, y.log = transfo, x.lim = segm.x.range.post, y.lim = segm.y.range.post, y.lab = "RATIO", y.tick.nb = 8, title = tempo.title, article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) } # end plot verif obs dots outside # end MD overlay plot after serpentine @@ -4366,7 +4310,7 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, -classic = TRUE, +article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, @@ -4419,7 +4363,7 @@ y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, -classic = TRUE, +article = TRUE, legend.width = NULL, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, @@ -4518,6 +4462,12 @@ rm(list = tempo.list) # not saved because initial matrices fun_close() +################ Seeding inactivation + + +set.seed(NULL) + + ################ Environment saving @@ -4534,9 +4484,22 @@ save(list = ls(), file = paste0(path.out, "/all_objects.RData")) ################ Warning messages +fun_report(data = "\n\n################################ RECAPITULATION OF WARNING MESSAGES", path = path.out, output = log.file, sep = 4) +if(warn.secu == TRUE){ +tempo <- fun_secu(pos = 1, name = function.name) # check variables of anova_contrasts script added in the Global ENV. SCRIPT remains to be tested +if( ! is.null(tempo)){ +tempo.warn <- paste0(if(erase.objects == FALSE){"IT COULD BE POSSIBLE THAT "}, tempo, if(erase.objects == FALSE){paste0("\nHOWEVER, SINCE erase.objects == FALSE, THE MATCH COULD COME FROM PRE-EXISTING VARIABLES\nPLEASE, SET erase.objects TO TRUE AT THE BEGINNING OF THE ", script, " SCRIPT AND RERUN TO CONFIRM THAT")}) +cat(paste0("\n\nWARNING:\n", tempo.warn, "\n\n")) +warning.message <- paste0(ifelse(is.null(warning.message), tempo.warning.message, paste0(warning.message, "\n\n", tempo.warn))) +} +}else{ +tempo.warn <- paste0("warn.secu PARAMETER SET TO FALSE -> NO CHECK OF VARIABLE CONFLICTS") +warning.message <- paste0(ifelse(is.null(warning.message), tempo.warn, paste0(warning.message, "\n\n", tempo.warn))) +} if( ! is.null(warning.message)){ -fun_report(data = "\n\n################################ WARNING MESSAGES", path = path.out, output = log.file, sep = 4) fun_report(data = warning.message, path = path.out, output = log.file) +}else{ +fun_report(data = "NO WARNING MESSAGE TO REPORT", path = path.out, output = log.file) } @@ -4546,7 +4509,10 @@ fun_report(data = warning.message, path = path.out, output = log.file) fun_report("\n\n################################ INITIAL SETTINGS OF PARAMETERS", output = log.file, path = path.out) fun_report(data = param.ini.settings, path = path.out, output = log.file, vector.cat = TRUE) fun_report("\n\n################################ R SYSTEM AND PACKAGES", output = log.file, path = path.out) -fun_report(data = sessionInfo(), path = path.out, output = log.file, vector.cat = TRUE) +tempo <- sessionInfo() +tempo$otherPkgs <- tempo$otherPkgs[order(names(tempo$otherPkgs))] # sort the packages +tempo$loadedOnly <- tempo$loadedOnly[order(names(tempo$loadedOnly))] # sort the packages +fun_report(data = tempo, path = path.out, output = log.file, vector.cat = TRUE) if(serp.binning == TRUE){ fun_report("\n\n################################ PYTHON SYSTEM AND PACKAGES", output = log.file, path = path.out) fun_report(data = reticulate::py_config(), path = path.out, output = log.file, vector.cat = TRUE) # to get the version of python, or use a python chunk ```{python} import IPython; python_session = IPython.sys_info();``` then reticulate::py$python_session diff --git a/slitherine.config b/slitherine.config index a60a96dbbe9af5f492ba9283de07fddad5d1eea0..0f7a0f55c1bc7b9f8794cf261c7717602306ca16 100644 --- a/slitherine.config +++ b/slitherine.config @@ -31,8 +31,8 @@ path.out <- "C:/Users/Gael/Desktop/" # absolute pathway of the destination folde ######## R packages and cute_little_R_functions file locations -path.lib <- NULL # vector of character that define the absolute pathway of the folder containing the R packages. Write NULL for the default path. BEWARE: default path is dependent on the system and interface used. For instance, using cygwin64 on windows 7, the path is "C:/Program Files/R/R-3.5.3/library". On the same cmputer using the R classical interface, the paths are [1] "C:/Users/Gael/Documents/R/win-library/3.5" [2] "C:/Program Files/R/R-3.5.3/library" -path.function1 <- "C:/Users/Gael/Documents/Git_projects/cute_little_R_functions/cute_little_R_functions.R" # file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" +lib.path <- NULL # vector of character that define the absolute pathway of the folder containing the R packages. Write NULL for the default path. BEWARE: default path is dependent on the system and interface used. For instance, using cygwin64 on windows 7, the path is "C:/Program Files/R/R-3.5.3/library". On the same cmputer using the R classical interface, the paths are [1] "C:/Users/Gael/Documents/R/win-library/3.5" [2] "C:/Program Files/R/R-3.5.3/library" +path.function1 <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/c1dd8832f14d76c1901a60ca2a7c87af01a82b61/cute_little_R_functions.R" # file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" ######## Matrix structure @@ -50,6 +50,11 @@ thread.nb <- NULL # Integer specifying the number of threads available. BEWARE: ################ End Mandatory settings + + + + + ################ Optional settings @@ -81,7 +86,7 @@ theo.import <- FALSE # logical. Import theoretical matrices already obtained usi theo.file.name1 <- "mat1.theo.serp.txt" # name of the first matrix file theo.file.name2 <- "mat2.theo.serp.txt" # name of the second matrix file theo.path.in <- "C:/Users/Gael/Documents/Hub projects/20190611 Scolari 13341/dataset/Meio/" # absolute pathway of the folder containing the input data files (file.name1 and file.name2) -n.row <- NULL # integer value setting the number of rows of the theoretical matrices. If NULL, n.row will take the number of column of the theoretical matrices +n.row <- 300 # integer value setting the number of rows of the theoretical matrices. If NULL, n.row will take the number of column of the theoretical matrices win.size <- 20 # nb of cv values taken in the sliding windows on the CV / MEAN plot to define an average cv at the rupture slope on the CV / MEAN plot (must be less than matrix dimension - 2). Increase this value if warning messages appears saying: "PARAMETER MUST BE SUCH THAT cv^2 > 1/mu" cv.rho.obtained <- FALSE # coefficient of variation (cv) of observed matrices 1 and 2, as well as correlation between observed matrices 1 and 2 already obtained ? If TRUE, will use the path.cv.rho parameter to load the data path.cv.rho <- "C:/Users/Gael/Desktop/cv1_cv2_rho1_rho2_backup.RData" # file and absolute pathway to download the cv1, cv2, as well as rho1 and rho2 (which are identical) of observed matrices 1 and 2 already obtained. Write NULL if not required. Not considered if cv.rho.obtained is FALSE @@ -89,17 +94,18 @@ correl.mat.obtained <- FALSE # theoretical matrix with permutation already obtai path.theo1.theo2 <- "C:/Users/Gael/Desktop/permut_mat1_mat2_backup.RData" # file and absolute pathway to download the theoretical matrix with permutation already obtained. Write NULL if not required. Not considered if correl.mat.obtained is FALSE single.corr <- "MAX" # either "VALUE", "MAX", "DEC1", "QUART1", "MED", "MIN" or "NO" # use MAX by default. if VALUE, a unique arbitrary value, defined in the abs.corr.limit parameter below, is used as reference to generate the correlation between the related diagonals of the theoretical theo1 and theo2 matrices (all the correlations between theo1 and theo2 diagonals will be close to abs.corr.limit). If MAX, the maximal correlation value between the observed mat1 and mat2 matrix diagonals will be used to generate the correlation between the related diagonals of theo1 and theo2 matrices (all the correlations between theo1 and theo2 diagonals will be close to max(rho1)). If DEC1, QUART1 MED or MIN, the same as MAX but using the first decile, first quartile, median or minimal correlation value between the observed mat1 and mat2 matrix diagonals, respectively. If NO, each of the observed correlations between the related diagonals of the mat1 and mat2 matrices will be used to generate the correlation of the corresponding theo1 and theo2 diagonal. In the case of NO, any observed correlation below the abs.corr.limit parameter will be set to abs.corr.limit (to avoid very long computing needed for very weak correlations) abs.corr.limit <- 0.2 # parameter used when single.corr <- "VALUE" or single.corr <- "NO". See the single.corr parameter description -count.print <- 1e6 # during the correlation adjustment process, print a message every count.print loops ? +print.count <- 1e6 # during the correlation adjustment process, print a message every print.count loops ? keep <- TRUE # keep the intermediate matrices and big objects in the working environment til the end? If TRUE, everything is saved in the final all_objects.RData. If FALSE, intermediate matrices are saved in different .RData files and then removed all along the script execution ######## Significant regions between the two compared matrices -ratio.limit.sig <- 2 # ratio value between the two matrice pixel, below which ratio is not significant? From 1 to +Inf (2 means coverage ratio less than 2 is not significant) +ratio.limit.sig <- 2 # ratio value between the two matrice pixel, below which ratio is not significant? From 1 to +Inf (2 means coverage ratio less than 2 is not significant, 1 means no cutoffs in ratio values) error <- 0 # from 0 to 1. Proportion of false positives (i.e., theo dots considered as observed dots). 0.05 means 5%, 0 means that the significant observed dot are outside of the theo cloud range.split <- 25 # for the significant dots. If x.range is the range of the dots on the x-axis, then abs(diff(x.range) / range.split) gives the window size. Window size decreases when range.split increases step.factor <- 10 # for the significant dots. x.win.size / step.factor gives the shift step of the window. When step.factor = 1, no overlap during the sliding. If step.factor = 2, 50% of overlap during 1 slide +ratio.normalization <- TRUE # logical. Divide the cell ratio matrix mat2 / mat1 (differential matrix) by the ratio factor mean(mat2) / mean(mat1)? If TRUE, this means that the mean of the normalized cell ratio matrix is 1, and log (parameter transfo <- TRUE)is 0 ######## Graphical and display parameters @@ -112,13 +118,19 @@ width.wind <- 7 # window width (in inches) height.wind <- 7 # window height (in inches) dot.size <- 2 # increase or decrease the value to increase or decrease the size of the dots line.size <- 0.75 # increase or decrease the value to increase or decrease the size of the lines -heatmap.text.size <- 8 # increase or decrease the value to increase or decrease the size of the heatmap scale text +heatmap.text.size <- 16 # increase or decrease the value to increase or decrease the size of the heatmap scale text text.size <- 12 # increase or decrease the value to increase or decrease the size of the axis text and legend text -title.text.size <- 4.5 # increase or decrease the value to increase or decrease the size of the title text +title.text.size <- 6 # increase or decrease the value to increase or decrease the size of the title text raster <- TRUE # raster mode for dot plots ? transfo <- "log2" # Either "log2" (matrix values will be log2 converted, and sometimes log2(x +1) converted, +1 to deal with zero) or "log10" ((matrix values will be log10 converted and sometimes log10(x +1) converted, +1 to deal with zero). BEWARE: observed matrices must remain integers for serpentine. Log is only applied for display, the reason why the option "no" is not proposed (which would mean data already log converted) +######## Others + + +warn.secu <- FALSE # logical. Display if internal homemade functions of anova_contrasts and anova_contrasts have variables that are present in other environments? + + ################ End Optional settings diff --git a/slitherine_script.R b/slitherine_script.R deleted file mode 100644 index 4139c6d72c9e58ffdf8fa94f8a777f8e3ac6b82d..0000000000000000000000000000000000000000 --- a/slitherine_script.R +++ /dev/null @@ -1,3125 +0,0 @@ -######################################################################### -## ## -## SLITHERINE v1.0.0 ## -## ## -## Gael A. Millot ## -## Vittore F. Scolari ## -## Lyam Baudry ## -## ## -######################################################################### - - - -################################ Aim - - -# Slitherine help to define significant coverage differences when comparing two contact matrices - - -################################ End Aim - - -################################ Introduction - - -# Compatible with R v6.3.1 -# Increase the R console window width if columns of tables are subjected to carriage return in the ...report.txt file - - -################################ End Introduction - - -################################ Acknowlegments - - - - -################################ End Acknowlegments - - -################################ Initialization - - -# R version checking -if(version$version.string != "R version 3.6.1 (2019-07-05)"){ -cat(paste0("\n\nWARNING: THE ", version$version.string, " IS NOT THE 6.3.1 RECOMMANDED\n\n")) -} -# other initializations -erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE -if(erase.objects == TRUE){ -rm(list=ls()) -erase.objects = TRUE -} -erase.graphs = TRUE # write TRUE to erase all the graphic windows in R before starting the algorithm and FALSE otherwise -script <- "slitherine v1.0.0" - - -################################ End Initialization - - -################################ Parameters that need to be set by the user - - -# see the slitherine.config file - - -################################ End Parameters that need to be set by the user - - -################################ Config import - - -slitherine.command <- paste0(commandArgs(trailingOnly = FALSE), collapse = ",") # recover the full command -args <- commandArgs(trailingOnly = TRUE) # recover arguments written after the call of the Rscript -if(any(is.na(args))){ -stop(paste0("\n\n================\n\nERROR: THE args OBJECT HAS NA\n\n================\n\n")) -} -tempo.arg.names <- c("config.path") # objects names exactly in the same order as in the bash code and recovered in args -if(length(args) != length(tempo.arg.names)){ -stop(paste0("\n\n================\n\nERROR: THE NUMBER OF ELEMENTS IN args (", length(args),") IS DIFFERENT FROM THE NUMBER OF ELEMENTS IN tempo.arg.names (", length(tempo.arg.names),")\nargs:", paste0(args, collapse = ","), "\ntempo.arg.names:", paste0(tempo.arg.names, collapse = ","), "\n\n================\n\n")) -} -for(i0 in 1:length(tempo.arg.names)){ -assign(tempo.arg.names[i0], args[i0]) -} -rm(tempo.arg.names, args, i0) -if( ! file.exists(config.path)){ -stop(paste0("\n\n============\n\nERROR: CONFIG FILE NAME AND PATH INDICATED IN SLITHERINE EXECUTION COMMAND DOES NOT EXISTS: ", config.path, "\n\n============\n\n")) -}else{ -source(config.path) # source the config parameters -} - - -################################ End Config import - - -################################ Recording of the initial parameters - - -param.list <- c( -"erase.objects", -"erase.graphs", -"script", -"slitherine.command", -"config.path", -"project.name", -"file.name1", -"file.name2", -"path.in", -"path.out", -"path.lib", -"path.function1", -"empty.cell.string", -"thread.nb", -"serp.binning", -"python", -"path.python.lib", -"serp.threshold", -"serp.minthreshold", -"serp.iter.nb", -"serp.symmet.input", -"theo.import", -"theo.file.name1", -"theo.file.name2", -"theo.path.in", -"n.row", -"win.size", -"cv.rho.obtained", -"path.cv.rho", -"correl.mat.obtained", -"path.theo1.theo2", -"single.corr", -"abs.corr.limit", -"count.print", -"keep", -"ratio.limit.sig", -"error", -"range.split", -"step.factor", -"activate.pdf", -"optional.text", -"width.wind", -"height.wind", -"dot.size", -"line.size", -"heatmap.text.size", -"text.size", -"title.text.size", -"raster", -"transfo" -) -if(any(duplicated(param.list))){ -stop(paste0("\n\n================\n\nERROR: THE param.list OBJECT CONTAINS DUPLICATED ELEMENTS:\n", paste(param.list[duplicated(param.list)], collapse = " "), "\n\n================\n\n")) # message for developers -} -if(erase.objects == TRUE){ -created.object.control <- ls()[ ! ls() %in% "param.list"] -if( ! (all(created.object.control %in% param.list) & all(param.list %in% created.object.control))){ -stop(paste0("\n\n================\n\nERROR: INCONSISTENCIES BETWEEN THE param.list ELEMENTS AND THE CREATED OBJECTS\nTHE CREATED OBJECTS (created.object.control) NOT PRESENT IN THE param.list ARE: ", paste(created.object.control[ ! created.object.control %in% param.list], collapse = " "), "\nTHE param.list ELEMENTS NOT PRESENT IN THE CREATED OBJECTS (created.object.control) ARE: ", paste(param.list[ ! param.list %in% created.object.control], collapse = " "), "\n\n================\n\n")) # message for developers -} -} -char.length <- nchar(param.list) -space.add <- max(char.length) - char.length + 5 -param.ini.settings <- character(length = length(param.list)) -for(i in 1:length(param.list)){ -param.ini.settings[i] <- paste0("\n", param.list[i], paste0(rep(" ", space.add[i]), collapse = ""), paste0(get(param.list[i]), collapse = ",")) -} - - -################################ End Recording of the initial parameters - - -################################ Functions - - -################ fun_ functions import - - -if(length(path.function1) != 1){ -stop(paste0("\n\n============\n\nERROR IN SLITHERINE\npath.function1 PARAMETER MUST BE LENGTH 1: ", paste(path.function1, collapse = " "), "\n\n============\n\n")) -}else if(grepl(x = path.function1, pattern = "^http")){ -tempo.try <- try(suppressWarnings(source(path.function1)), silent = TRUE) -if(any(grepl(x = tempo.try, pattern = "[Ee]rror"))){ -stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nHTTP INDICATED IN THE path.function1 PARAMETER DOES NOT EXISTS: ", path.function1, "\n\n============\n\n")) -}else{ -source(path.function1) # source the fun_ functions used below -} -}else if( ! grepl(x = path.function1, pattern = "^http")){ -if( ! file.exists(path.function1)){ -stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nFILE INDICATED IN THE path.function1 PARAMETER DOES NOT EXISTS: ", path.function1, "\n\n============\n\n")) -}else{ -source(path.function1) # source the fun_ functions used below -} -} - - -################ package import - - -# R Packages required -req.package.list <- c( -"reticulate", -"parallel", -"lubridate", -"reshape2", -"ggplot2", -"Cairo" -) - - -# Python Packages required -req.python.package.list <- c( -"serpentine" -) - - -# A function is created in order to import packages after parameter checkings -package.import.fun <- function(){ -# R Packages verification and import -fun_pack(req.package = req.package.list, path.lib = path.lib) # packages are imported even if inside functions are written as package.name::function() in the present code -# Python Packages verification and import -if(serp.binning == TRUE){ -fun_python_pack(req.package = req.python.package.list, path.python.exec = python, path.lib = path.python.lib, R.path.lib = path.lib) -} -} - - -################ local functions - - -adj.mean.fun <- function(x_fun, text_fun = ""){ -# compute a weighted mean, according to the number of cells per diagonal, such as the main diagonal has the max weight and the corner the min -# x_fun: a matrix -# text: text for error messages -# argument checking -arg.check <- NULL # for function debbuging -checked.arg.names <- NULL # for function debbuging: used by r_debugging_tools -ee <- expression(arg.check <- c(arg.check, tempo$problem) , checked.arg.names <- c(checked.arg.names, tempo$param.name)) -tempo <- fun_check(data = x_fun, class = "matrix", mode = "numeric", na.contain = TRUE, fun.name = "adj.mean.fun INTERNAL FUNCTION OF SLITHERINE") -tempo <- fun_check(data = text_fun, class = "vector", typeof = "character", length = 1, fun.name = "adj.mean.fun INTERNAL FUNCTION OF SLITHERINE") -if(any(arg.check) == TRUE){ -stop() # nothing else because print = TRUE by default in fun_check() -} -# end argument checking -cell.rm <- is.na(x_fun) | ! is.finite(x_fun) # matrix indicating the positions of the NA and Inf in x_fun -x_fun[cell.rm] <- 0 # replacing NA and Inf by 0, because after, we are doing sum -if(all(cell.rm) == TRUE){ -tempo.cat <- paste0("\n\n============\n\n", ifelse(text_fun == "", "", paste0(text_fun, "\n")), "ERROR IN adj.mean.fun INTERNAL FUNCTION OF SLITHERINE\nTHE MATRIX HAS ONLY NA, NaN AND Inf\nNOT POSSIBLE TO COMPUTE THE MEAN\n\n============\n\n") -stop(tempo.cat) -} -mean.output <- sum(apply(x_fun, 2, sum, na.rm = TRUE) * ncol(x_fun):1) / (sum(ncol(x_fun):1) * nrow(x_fun)) # here, instead of doing mean(theo$RATIO[is.finite(theo$RATIO)], na.rm = TRUE), we wheighted the cell ratio using sum(r.ij * (c - j - 1)) / sum(c - j - 1), with r.ij the ratio in cell ij, c the number of column in theo matrices (which is the number of diag in the obs matrices) and j the column index. Thus we weight each value inside theo column by the number of values in diagonals -if(all(is.na(mean.output)) | all( ! is.finite(mean.output))){ -tempo.cat <- paste0("\n\n============\n\n", ifelse(text_fun == "", "", paste0(text, "\n")), "ERROR IN adj.mean.fun INTERNAL FUNCTION OF SLITHERINE: THE ADJUSTED MEAN IS NA, NaN OR Inf: ", paste(mean.output, collapse = " "), "\n\n============\n\n") -stop(tempo.cat) -}else{ -return(mean.output) -} -} - -mask.plot.fun <- function(mask_fun, mat1_fun, mat2_fun, serp_kind, mask_kind, text_fun = ""){ -# plot the sup, inf and full masks -# x_fun: a matrix -# text_fun: text for plotting messages -# argument checking -arg.check <- NULL # for function debbuging -checked.arg.names <- NULL # for function debbuging: used by r_debugging_tools -ee <- expression(arg.check <- c(arg.check, tempo$problem) , checked.arg.names <- c(checked.arg.names, tempo$param.name)) -tempo <- fun_check(data = mask_fun, class = "matrix", mode = "numeric", na.contain = TRUE, fun.name = "mask.plot.fun INTERNAL FUNCTION OF SLITHERINE") ; eval(ee) -tempo <- fun_check(data = mat1_fun, class = "matrix", mode = "numeric", na.contain = TRUE, fun.name = "mask.plot.fun INTERNAL FUNCTION OF SLITHERINE") ; eval(ee) -tempo <- fun_check(data = mat2_fun, class = "matrix", mode = "numeric", na.contain = TRUE, fun.name = "mask.plot.fun INTERNAL FUNCTION OF SLITHERINE") ; eval(ee) -tempo <- fun_check(data = serp_kind, options = c("BEFORE", "AFTER"), length = 1) ; eval(ee) -tempo <- fun_check(data = mask_kind, options = c("POSITIVE (MAT2 > MAT1)", "NEGATIVE (MAT2 < MAT1)", "FULL"), length = 1, fun.name = "mask.plot.fun INTERNAL FUNCTION OF SLITHERINE") ; eval(ee) -tempo <- fun_check(data = text_fun, class = "vector", typeof = "character", length = 1, fun.name = "mask.plot.fun INTERNAL FUNCTION OF SLITHERINE") ; eval(ee) -if(any(arg.check) == TRUE){ -stop() # nothing else because print = TRUE by default in fun_check() -} -# end argument checking -# mask -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0(serp_kind," SERPENTINE ", mask_kind, " MASK (INVERTED)", text_fun) -fun_gg_heatmap(data1 = mask_fun, low.color1 = "white", mid.color1 = NULL, high.color1 = "black", title = tempo.title, title.text.size = title.text.size, show.scale = FALSE) # I tried without meltedbefore. Add flip in fun_heatmap to reverse x and y if required -# end mask -# Heatmap -loop.mat.names <- c("mat1_fun", "mat2_fun") -loop.heatmap.title <- c(paste0("OBS MAT1 ", serp_kind, " SERPENTINE"), paste0("OBS MAT2 ", serp_kind, " SERPENTINE")) -for(i0 in 1:length(loop.mat.names)){ -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0])[is.finite(get(loop.mat.names[i0]))], na.rm = TRUE) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0(loop.heatmap.title[i0], "\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION, WHICH WOULD EXPLAIN INTENSITY VARIATION)", text_fun) -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range, na.rm = TRUE), max(heatmap.range, na.rm = TRUE)), midpoint1 = mean(heatmap.range, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -tempo.title <- paste0(loop.heatmap.title[i0], " + ", mask_kind, " MASK\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION, WHICH WOULD EXPLAIN INTENSITY VARIATION)", text_fun) -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range, na.rm = TRUE), max(heatmap.range, na.rm = TRUE)), midpoint1 = mean(heatmap.range, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size, data2 = mask_fun, color2 = "black", alpha2 = 0.5, invert2 = TRUE) -} -# end Heatmap -# differential heatmap -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- (mat2_fun / mat1_fun) # BEWARE: convention in serptentine is this one -tempo.data.plot[tempo.data.plot == 0] <- NA # replacement of zero by NA -tempo.data.plot <- tempo.data.plot / mean(tempo.data.plot[is.finite(tempo.data.plot)], na.rm = TRUE) # mean normalization -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) # log transfo -} -tempo.range <- max(abs(tempo.data.plot[is.finite(tempo.data.plot)]), na.rm = TRUE) -tempo.range <- c(-tempo.range, tempo.range) -tempo.title <- paste0("OBS DIFF MAT ", serp_kind, " SERPENTINE (OBS2 / OBS1)\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"RAW VALUES"}, " TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(tempo.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)\n0 VALUES REPLACED BY NA\nGREY COLOR WHEN NaN (DIVISION BY ZERO) OR NA", text_fun) -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = tempo.range, midpoint1 = 0, title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -tempo.title <- paste0("OBS DIFF MAT ", serp_kind, " SERPENTINE (OBS2 / OBS1) + ", mask_kind, " MASK\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(tempo.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)\n0 VALUES REPLACED BY NA\nGREY COLOR WHEN NaN (DIVISION BY ZERO)", text_fun) -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = tempo.range, midpoint1 = 0, title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size, data2 = mask_fun, color2 = "black", alpha2 = 0.5, invert2 = TRUE) -# end differential heatmap -} - - -################################ End Functions - - -################################ Main code - - -################ Pre-ignition checking - -warning.message <- NULL -param.check <- NULL # -checked.param.names <- NULL # -ee <- expression(param.check <- c(param.check, tempo$problem) , checked.param.names <- c(checked.param.names, tempo$param.name)) -tempo <- fun_check(data = erase.objects, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = erase.graphs, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = script, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = project.name, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = file.name1, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & substr(file.name1, nchar(file.name1) - 3, nchar(file.name1)) != ".txt"){ -tempo.warning <- paste0("THE file.name1 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -tempo <- fun_check(data = file.name2, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & substr(file.name2, nchar(file.name2) - 3, nchar(file.name2)) != ".txt"){ -tempo.warning <- paste0("THE file.name2 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -tempo <- fun_check(data = path.in, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! dir.exists(path.in)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n")) -} -if(tempo$problem == FALSE & ! file.exists(paste0(path.in, file.name1))){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: file.name1 PARAMETER\n", file.name1, "\nDOES NOT EXIST AT path.in:\n", path.in, "\n\n============\n\n")) -} -if(tempo$problem == FALSE & ! file.exists(paste0(path.in, file.name2))){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: file.name2 PARAMETER\n", file.name2, "\nDOES NOT EXIST AT path.in:\n", path.in, "\n\n============\n\n")) -} -tempo <- fun_check(data = path.out, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! dir.exists(path.out)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIRECTORY PATH INDICATED IN THE path.out PARAMETER DOES NOT EXISTS: ", path.out, "\n\n============\n\n")) -} -# tempo <- fun_check(data = path.lib, class = "character") ; eval(ee) # path.lib already cheked above in the fun_pack() function. Not rechecked because can be NULL -tempo <- fun_check(data = path.function1, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) # full check above -if( ! is.null(empty.cell.string)){ -tempo <- fun_check(data = empty.cell.string, class = "vector", na.contain = TRUE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -} -if( ! is.null(thread.nb)){ -tempo <- fun_check(data = thread.nb, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & thread.nb < 1){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: thread.nb PARAMETER MUST EQUAL OR GREATER THAN 1: ", ratio.limit.sig, "\n\n============\n\n")) -} -} -tempo <- fun_check(data = serp.binning, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = python, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! file.exists(python)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: FILE AND PATH INDICATED IN THE python PARAMETER DOES NOT EXISTS: ", python, "\n\n============\n\n")) -} -if( ! is.null(path.python.lib)){ -tempo <- fun_check(data = path.python.lib, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! dir.exists(path.python.lib)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIRECTORY PATH INDICATED IN THE path.python.lib PARAMETER DOES NOT EXISTS: ", path.python.lib, "\n\n============\n\n")) -} -} -tempo <- fun_check(data = serp.threshold, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = serp.minthreshold, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = serp.iter.nb, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = serp.symmet.input, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = hiccomp, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & is.null(binning)){ -if(hiccomp == TRUE){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: binning PARAMETER CANNOT BE NULL IF hiccomp PARAMETER IS TRUE\n\n============\n\n")) -} -} -if( ! is.null(binning)){ -tempo <- fun_check(data = binning, class = "vector", typeof = "integer", double.as.integer.allowed = TRUE, na.contain = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -} -tempo <- fun_check(data = theo.import, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & theo.import == TRUE){ -tempo <- fun_check(data = theo.file.name1, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & substr(theo.file.name1, nchar(theo.file.name1) - 3, nchar(theo.file.name1)) != ".txt"){ -tempo.warning <- paste0("THE theo.file.name1 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -tempo <- fun_check(data = theo.file.name2, class = "vector", mode = "character", fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & substr(theo.file.name2, nchar(theo.file.name2) - 3, nchar(theo.file.name2)) != ".txt"){ -tempo.warning <- paste0("THE theo.file.name2 OBJECT SETTING SHOULD BE A TXT FILE BUT DOES NOT FINISH BY \".txt\" LOWERCASE WRITTEN") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -tempo <- fun_check(data = theo.path.in, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! dir.exists(theo.path.in)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIRECTORY PATH INDICATED IN THE theo.path.in PARAMETER DOES NOT EXISTS: ", theo.path.in, "\n\n============\n\n")) -} -} -if( ! is.null(n.row)){ -tempo <- fun_check(data = n.row, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -} -tempo <- fun_check(data = win.size, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = cv.rho.obtained, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & cv.rho.obtained == TRUE){ -if( ! is.null(path.cv.rho)){ -tempo <- fun_check(data = path.cv.rho, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! file.exists(path.cv.rho)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: FILE AND PATH INDICATED IN THE path.cv.rho PARAMETER DOES NOT EXISTS: ", path.cv.rho, "\n\n============\n\n")) -} -} -} -tempo <- fun_check(data = correl.mat.obtained, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & correl.mat.obtained == TRUE){ -if( ! is.null(path.theo1.theo2)){ -tempo <- fun_check(data = path.theo1.theo2, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ! file.exists(path.theo1.theo2)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: FILE AND PATH INDICATED IN THE path.theo1.theo2 PARAMETER DOES NOT EXISTS: ", path.theo1.theo2, "\n\n============\n\n")) -} -} -} -tempo <- fun_check(data = single.corr, options = c("VALUE", "MAX", "DEC1", "QUART1", "MED", "MIN", "NO"), length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = abs.corr.limit, prop = TRUE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = count.print, typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = keep, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = ratio.limit.sig, class = "vector", mode = "numeric", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & ratio.limit.sig < 1){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: ratio.limit.sig PARAMETER MUST EQUAL OR GREATER THAN 1: ", ratio.limit.sig, "\n\n============\n\n")) -} -tempo <- fun_check(data = error, prop = TRUE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = range.split, class = "vector", mode = "numeric", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = step.factor, class = "vector", mode = "numeric", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == FALSE & step.factor < 1){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: step.factor PARAMETER MUST EQUAL OR GREATER THAN 1: ", ratio.limit.sig, "\n\n============\n\n")) -} -tempo <- fun_check(data = activate.pdf, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = optional.text, class = "vector", typeof = "character", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = width.wind, class = "vector", mode = "numeric", length = 1, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = height.wind, class = "vector", mode = "numeric", length = 1, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = dot.size, class = "vector", mode = "numeric", length = 1, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = line.size, class = "vector", mode = "numeric", length = 1, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = heatmap.text.size, class = "vector", mode = "numeric", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = text.size, class = "vector", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = title.text.size, class = "vector", neg.values = FALSE, length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = raster, class = "vector", typeof = "logical", length = 1, fun.name = "SLITHERINE") ; eval(ee) -tempo <- fun_check(data = transfo, options = c("log2", "log10"), length = 1, fun.name = "SLITHERINE") ; eval(ee) -if(any(param.check) == TRUE){ -stop() # nothing else because print = TRUE by default in fun_check() -} - - -################ End pre-ignition checking - - -################ package import - - -package.import.fun() - - -################ End package import - - -################ Ignition - - -set.seed(1014) -options(scipen = 7) -analysis.nb <- trunc(as.numeric(Sys.time())) # to provide a specific number ot each analysis -log.file <- paste0(project.name, "_", analysis.nb,"_report.txt") -name.dir <- paste0(project.name, "_", analysis.nb) -path.out<-paste0(path.out, name.dir) -if(dir.exists(path.out)){ -tempo.cat <- paste0("\n\n============\n\nERROR IN SLITHERINE: DIRECTORY ALREADY EXISTS: ", path.out, "\n\n============\n\n") -stop(tempo.cat) -}else{ -suppressWarnings(dir.create(path.out)) -} -cat("\nSLITHERINE IGNITION\n") -fun_report(data = paste0("\n\n################################ ", log.file, " ################"), output = log.file, no.overwrite = FALSE, path = path.out, sep = 4) -ini.date <- Sys.time() -ini.time <- as.numeric(ini.date) # time of process begin, converted into seconds -fun_report("\n\n################################ RUNNING DATE AND STARTING TIME", output = log.file, path = path.out) -fun_report(data = ini.date, path = path.out, output = log.file, vector.cat = TRUE) -cat("\nINITIAL SETTINGS AND DATA MODIFICATIONS\n") -fun_report(data = "\n\n################################ INITIAL SETTINGS AND DATA MODIFICATIONS", path = path.out, output = log.file, sep = 4) -if(optional.text != ""){ -fun_report(data = "OPTIONAL TEXT: ", path = path.out, output = log.file) -fun_report(data = optional.text, path = path.out, output = log.file) -} - - -################ End ignition - - -################ Graphical parameter initialization - - -fun_open(pdf.disp = activate.pdf, path.fun = path.out, pdf.name.file = "initialization") -par.ini <- par(no.readonly = TRUE) # to recover the initial graphical parameters if required (reset) -invisible(dev.off()) # close the new window -if(activate.pdf == TRUE){ -invisible(file.remove(paste0(path.out, "/initialization.pdf"))) -} -zone.ini <- matrix(1, ncol=1) -if(erase.graphs == TRUE){ -graphics.off() -}else{ -tempo.warning <- paste0("GRAPHICS HAVE NOT BEEN ERASED. GRAPHICAL PARAMETERS MAY HAVE NOT BEEN REINITIALIZED") -fun_report(data = tempo.warning, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} - - -################ End graphical parameter initialization - - -################ Data import - - -mat1.obs.ini <- as.matrix(read.table(paste0(path.in, file.name1))) -mat2.obs.ini <- as.matrix(read.table(paste0(path.in, file.name2))) -if(theo.import == TRUE){ -mat1.mix <- as.matrix(read.table(paste0(theo.path.in, theo.file.name1))) -mat2.mix <- as.matrix(read.table(paste0(theo.path.in, theo.file.name2))) -tempo.warning <- paste0("IMPORT OF THEORETICAL MATRICES, WHICH WOULD MEAN THAT THE OBSERVED MATRICES IMPORTED HAVE ALREADY BEEN BINNED USING SERPENTINE") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used - -} - - -################ End data import - - -################ Checking - - -param.check <- NULL # -checked.param.names <- NULL # -ee <- expression(param.check <- c(param.check, tempo$problem) , checked.param.names <- c(checked.param.names, tempo$param.name)) -for(i0 in 1:2){ -if(theo.import == FALSE){ # obs matrix must be integer for serpentine binning or for theoretical matrix design -tempo <- fun_check(data = get(paste0("mat", i0, ".obs.ini")), data.name = paste0("mat", i0, ".obs.ini"), class = "matrix", typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == TRUE & all(typeof(get(paste0("mat", i0, ".obs.ini"))) %in% "character")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: IMPORTED MATRIX IS TYPE \"CHARACTER\". CHECK THAT IT HAS NO ROW OR COLUMN NAMES, WHICH WOULD HAMPER THE MATRIX IMPORT:\n\nMATRIX ", i0," (", get(paste0("file.name", i0)), "):\n\n")) -print(fun_head(get(paste0("mat", i0, ".obs.ini")))) -cat("\n\n============\n\n") -}else if(tempo$problem == TRUE & all(typeof(get(paste0("mat", i0, ".obs.ini"))) %in% "double")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: IMPORTED MATRIX ", get(paste0("file.name", i0)), " HAS DECIMAL VALUES (TYPE \"DOUBLE\"),\nWHILE THE GENERATION OF THEORETICAL MATRICES REQUIRES INTEGERS (USE OF DISCRETE DISTRIBUTIONS).\nARE YOU SURE THAT theo.import PARAMETER IS CORRECTLY SET TO FALSE?\n\n")) -print(fun_head(get(paste0("mat", i0, ".obs.ini")))) -cat("\n\n============\n\n") -}else if(tempo$problem == TRUE){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: ", get(paste0("file.name", i0)), " PARAMETER MUST BE INTEGER MATRIX (OR DECIMAL MATRIX IF ALREADY SERPENTINE BINNED): ", paste(typeof(get(paste0("mat", i0, ".obs.ini"))), collapse = " "), "\n\n============\n\n")) -} -}else{ # obs matrix can be decimal (serpentine binned matrix imported), but theo.import must be TRUE -tempo <- fun_check(data = get(paste0("mat", i0, ".obs.ini")), data.name = paste0("mat", i0, ".obs.ini"), class = "matrix", mode = "numeric", neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == TRUE & all(typeof(get(paste0("mat", i0, ".obs.ini"))) %in% "character")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: IMPORTED MATRIX IS TYPE \"CHARACTER\". CHECK THAT IT HAS NO ROW OR COLUMN NAMES, WHICH WOULD HAMPER THE MATRIX IMPORT:\n\nMATRIX ", i0," (", get(paste0("file.name", i0)), "):\n\n")) -print(fun_head(get(paste0("mat", i0, ".obs.ini")))) -cat("\n\n============\n\n") -} -tempo <- fun_check(data = get(paste0("mat", i0, ".obs.ini")), data.name = paste0("mat", i0, ".obs.ini"), class = "matrix", typeof = "integer", double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = "SLITHERINE", print = FALSE) -if(tempo$problem == FALSE & all(typeof(get(paste0("mat", i0, ".obs.ini"))) %in% "integer")){ -tempo.cat <- paste0("OBSERVED MATRIX ", i0, " MADE OF INTEGERS WHILE THEORETICAL MATRICES IMPORTED. ARE YOU SURE THAT OBSERVED MATRICES ARE ALREADY SERPENTINE BINNED?") -cat(paste0("\nWARNING: ", tempo.cat, "\n")) -fun_report(data = tempo.cat, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) -} -tempo <- fun_check(data = get(paste0("mat", i0, ".mix")), data.name = paste0("mat", i0, ".mix"), class = "matrix", mode = "numeric", double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) -if(tempo$problem == TRUE & all(typeof(get(paste0("mat", i0, ".mix"))) %in% "character")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: IMPORTED MATRIX IS TYPE \"CHARACTER\". CHECK THAT IT HAS NO ROW OR COLUMN NAMES, WHICH WOULD HAMPER THE MATRIX IMPORT:\n\nMATRIX ", i0," (", get(paste0("file.name", i0)), "):\n\n")) -print(fun_head(get(paste0("mat", i0, ".mix")))) -cat("\n\n============\n\n") -}else if(tempo$problem == TRUE & ! all(mode(get(paste0("mat", i0, ".mix"))) %in% "numeric")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: ", paste0("theo.file.name", i0), " PARAMETER MUST BE DECIMAL MATRIX: ", paste(mode(get(paste0("mat", i0, ".mix"))), collapse = " "), "\n\n============\n\n")) -} -} -} -tempo <- fun_comp_2d(mat1.obs.ini, mat2.obs.ini) -if(tempo$same.dim == FALSE){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIMENSIONS OF MATRIX 1 AND 2 MUST BE THE SAME.\n\nMATRIX 1: ", paste(dim(mat1.obs.ini), collapse = " "), "\n\nMATRIX 2: ", paste(dim(mat2.obs.ini), collapse = " "), "\n\n============\n\n")) -}else if(tempo$dim[1] != tempo$dim[2]){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: MATRIX 1 AND 2 MUST BE SQUARE MATRICES. HERE DIMENSIONS ARE: ", paste(tempo$dim, collapse = " "), "\n\n============\n\n")) -}else if(tempo$identical.content == TRUE){ -tempo.cat <- paste0("MATRIX 1 AND 2 HAVE IDENTICAL CONTENT") -cat(paste0("\nWARNING: ", tempo.cat, "\n")) -fun_report(data = tempo.cat, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -if(theo.import == TRUE){ -for(i0 in 1:2){ -tempo <- fun_check(data = get(paste0("mat", i0, ".mix")), data.name = paste0("mat", i0, ".mix"), class = "matrix", mode = "numeric", neg.values = FALSE, fun.name = "SLITHERINE") ; eval(ee) # not necessary integer because serpentine binned matrices are means of binning -if(tempo$problem == TRUE & all(typeof(get(paste0("mat", i0, ".mix"))) %in% "character")){ -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: IMPORTED MATRIX IS TYPE \"CHARACTER\". CHECK THAT IT HAS NO ROW OR COLUMN NAMES, WHICH WOULD HAMPER THE MATRIX IMPORT:\n\nMATRIX ", i0," (", get(paste0("mat", i0, ".mix")), "):\n\n")) -print(fun_head(get(paste0("mat", i0, ".mix")))) -cat("\n\n============\n\n") -} -} -tempo <- fun_comp_2d(mat1.mix, mat2.mix) -if(tempo$same.dim == FALSE){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: DIMENSIONS OF THEORETICAL MATRIX 1 AND 2 MUST BE THE SAME.\n\nMATRIX 1: ", paste(dim(mat1.mix), collapse = " "), "\n\nMATRIX 2: ", paste(dim(mat2.mix), collapse = " "), "\n\n============\n\n")) -}else if(tempo$identical.content == TRUE){ -tempo.cat <- paste0("THEORETICAL MATRIX 1 AND 2 HAVE IDENTICAL CONTENT") -cat(paste0("\nWARNING: ", tempo.cat, "\n")) -fun_report(data = tempo.cat, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -if(ncol(mat1.mix) != ncol(mat1.obs.ini)){ -param.check <- c(param.check, TRUE) -cat(paste0("\n\n============\n\nERROR IN SLITHERINE: NUMBER OF COLUMN MUST BE THE SAME BETWEEN THEORETICAL AND OBSERVED MATRICE.\n\nMATRIX 1: ", paste(ncol(mat1.mix), collapse = " "), "\n\nTHEO MATRIX 1: ", paste(ncol(mat1.obs.ini), collapse = " "), "\n\n============\n\n")) -} -} -if(any(param.check) == TRUE){ -stop() # nothing else because print = TRUE by default in fun_check() -} - - -################ Info - - -fun_report(data = "MATRIX 1 INFO:", path = path.out, output = log.file, sep = 1) -fun_report(data = fun_info(mat1.obs.ini)[c("CLASS", "TYPE", "DIMENSION", "SUM", "RANGE", "MEAN", "NA.NB", "INF.NB")], path = path.out, output = log.file, sep = 1) -fun_report(data = "MATRIX 2 INFO:", path = path.out, output = log.file, sep = 1) -fun_report(data = fun_info(mat2.obs.ini)[c("CLASS", "TYPE", "DIMENSION", "SUM", "RANGE", "MEAN", "NA.NB", "INF.NB")], path = path.out, output = log.file, sep = 1) - - -################ End Info - - -################ Modification of imported matrices - - -# detection of half matrix -mat1.modif <- FALSE -mat2.modif <- FALSE -if( ! is.null(empty.cell.string)){ -if( ! any(mat1.obs.ini %in% empty.cell.string)){ # works for NA, Inf, etc. -stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nPARAMETER empty.cell.string SET TO ", empty.cell.string, ", WHICH IS NOT PRESENT IN THE IMPORTED MATRIX 1\n\n============\n\n")) -} -tempo.mat1.obs <- fun_mat_fill(mat = mat1.obs.ini, empty.cell.string = empty.cell.string) -if( ! is.null(tempo.mat1.obs$warnings)){ -mat1.modif <- TRUE -fun_report(data = tempo.mat1.obs$warnings, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat1.obs$warnings) -}else{ -fun_report(data = paste0("MATRIX 1 DETECTED AS SYMMETRIC"), path = path.out, output = log.file) -} -mat1.obs <- tempo.mat1.obs$mat -if( ! any(mat2.obs.ini %in% empty.cell.string)){ # works for NA, Inf, etc. -stop(paste0("\n\n============\n\nERROR IN SLITHERINE\nPARAMETER empty.cell.string SET TO ", empty.cell.string, ", WHICH IS NOT PRESENT IN THE IMPORTED MATRIX 2\n\n============\n\n")) -} -tempo.mat2.obs <- fun_mat_fill(mat = mat2.obs.ini, empty.cell.string = empty.cell.string) -if( ! is.null(tempo.mat2.obs$warnings)){ -mat2.modif <- TRUE -fun_report(data = tempo.mat2.obs$warnings, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.mat2.obs$warnings) # in fact, abs(tempo.cor) is systematically used -}else{ -fun_report(data = paste0("MATRIX 1 DETECTED AS SYMMETRIC"), path = path.out, output = log.file) -} -mat2.obs <- tempo.mat2.obs$mat -}else{ -mat1.obs <- mat1.obs.ini -mat2.obs <- mat2.obs.ini -tempo.warning <- paste0("IMPORTED MATRICES NOT DECLARED HALF FILLED BY THE USER (empty.cell.string PARAMETER SET TO NULL)") -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) -fun_report(data = tempo.warning, path = path.out, output = log.file) -} -# detection of the matrix orientation, and t() potentially to always have the same orientation? -> done with the message provided by fun_mat_fill() and the rotate option of fun_heatmap() -# end detection of half matrix - -# diagonal removal -if( ! all(unique(as.matrix(as.data.frame(list(1:nrow(mat1.obs), 1:ncol(mat1.obs))))) == 0)){ -mat1.obs[as.matrix(as.data.frame(list(1:nrow(mat1.obs), 1:ncol(mat1.obs))))] <- 0 -mat1.modif <- TRUE -tempo.warning <- paste0("MAIN DIAGONAL OF MATRIX 1 HAS BEEN REPLACED BY 0") -fun_report(data = tempo.warning, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # -} -if( ! all(unique(as.matrix(as.data.frame(list(1:nrow(mat2.obs), 1:ncol(mat2.obs))))) == 0)){ -mat2.obs[as.matrix(as.data.frame(list(1:nrow(mat2.obs), 1:ncol(mat2.obs))))] <- 0 -mat2.modif <- TRUE -tempo.warning <- paste0("MAIN DIAGONAL OF MATRIX 2 HAS BEEN REPLACED BY 0") -fun_report(data = tempo.warning, path = path.out, output = log.file) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -# end diagonal removal - -# heatmap: matrix checking -if(theo.import == FALSE){ -loop.mat.names <- c("mat1.obs.ini", "mat1.obs", "mat2.obs.ini", "mat2.obs") -loop.heatmap.title <- c("MATRIX 1 IMPORTED", "MATRIX 1 AFTER MODIFICATIONS", "MATRIX 2 IMPORTED", "MATRIX 2 AFTER MODIFICATIONS") -}else{ -loop.mat.names <- c("mat1.obs.ini", "mat1.obs", "mat2.obs.ini", "mat2.obs", "mat1.mix", "mat2.mix") -loop.heatmap.title <- c("MATRIX 1 IMPORTED", "MATRIX 1 AFTER MODIFICATIONS", "MATRIX 2 IMPORTED", "MATRIX 2 AFTER MODIFICATIONS", "THEORETICAL MATRIX 1 IMPORTED", "THEORETICAL MATRIX 2 IMPORTED") -} -# graphic range (min and max value inside matrix) recovering: to have same matrix resolution -# BEWARE: this means that I take the log2(... + 1) of the matrix that I normalize by the mean of this. Thus, I plot log2(... + 1) on get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0]), na.rm = TRUE) -heatmap.range <- NULL -for(i0 in 1:length(loop.mat.names)){ -# data transformtation -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0])[is.finite(get(loop.mat.names[i0]))], na.rm = TRUE) # mean normalization -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) # log transfo -} -# end data transformtation -heatmap.range <- range(c(heatmap.range, tempo.data.plot), na.rm = TRUE, finite = TRUE) -} -# end graphic range (min and max value inside matrix) recovering: to have same matrix resolution -# heatmap -loop.heatmap.title <- paste0(loop.heatmap.title, "\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION, WHICH WOULD EXPLAIN INTENSITY VARIATION)") -for(i0 in 1:length(loop.mat.names)){ -# data transformtation -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0])[is.finite(get(loop.mat.names[i0]))], na.rm = TRUE) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -} -# end data transformation -if(activate.pdf == TRUE){ -if(i0 == 1){ -fun_open(pdf.disp = activate.pdf, path.fun = path.out, pdf.name.file = paste0("plots_", analysis.nb), width.fun = width.wind, height.fun = height.wind) -pdf.nb <- dev.cur() -}else{ -invisible(dev.set(pdf.nb)) -} -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if(i0 == 1){ -fun_gg_empty_graph(text = "MATRIX MODIFICATION\nCHECKING", text.size = 3) -} -if((loop.mat.names[i0] == "mat1.obs" & mat1.modif == FALSE) | (loop.mat.names[i0] == "mat2.obs" & mat2.modif == FALSE)){ -fun_gg_empty_graph(title = loop.heatmap.title[i0], text = "NO MODIFICATION OF THE IMPORTED MATRIX\n(HALF FILLING\n&\nMAIN DIAGONAL REPLACEMENT BY ZERO", text.size = 3, title.size = title.text.size) -}else{ -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range, na.rm = TRUE), max(heatmap.range, na.rm = TRUE)), midpoint1 = mean(heatmap.range, na.rm = TRUE), title = loop.heatmap.title[i0], text.size = heatmap.text.size, title.text.size = title.text.size) -} -} -if(keep == FALSE){ -rm(list = c("mat1.obs.ini", "mat2.obs.ini")) # not saved because initial matrices -} -# end heatmap -# end heatmap: matrix checking - - -################ end modification of imported matrices - - -################ detection of diagonales with mean zero - - -if(theo.import == FALSE){ - - -# means of matrix diagonales will then be modified such as when one is mean zero, the other one is also mean zero -tempo.cat <- "MEANS OF MATRIX DIAGONALES ARE MODIFIED SUCH AS WHEN ONE IS MEAN ZERO, THE OTHER ONE IS ALSO MEAN ZERO" -fun_report(data = tempo.cat, output = log.file, path = path.out) -# Means and sd are computed for each diagonal -tempo.coord <- row(mat1.obs) - col(mat1.obs) # BEWARE: diag 0 is first diagonal and diag n-1 is last diagonal !! row(pmat)-col(pmat) generate a matrix with same number on upleft/rightdown diag, with 0 for main diag and negative number for half right part -diag.mean.mat1.obs <- sapply(0:-(nrow(mat1.obs) - 1), FUN = function(x){mean(mat1.obs[tempo.coord == x], na.rm = TRUE)}) # work on the diagonales -diag.mean.mat2.obs <- sapply(0:-(nrow(mat2.obs) - 1), FUN = function(x){mean(mat2.obs[tempo.coord == x], na.rm = TRUE)}) # work on the diagonales -common.null.mean.pos <- diag.mean.mat1.obs == 0 | diag.mean.mat2.obs == 0 - - -################ end detection of diagonales with mean zero - - -################ Analysis of observed matrices and data extraction - - -# MATRIX 1 -cat("\nCOLLECTION OF DIAGONAL PARAMETERS FROM OBSERVED MATRICES\n") -fun_report(data = "\n\n################################ COLLECTION OF DIAGONAL PARAMETERS FROM OBSERVED MATRICES", path = path.out, output = log.file, sep = 4) -# Means and sd are computed for each diagonal -tempo.coord <- row(mat1.obs) - col(mat1.obs) # row(pmat)-col(pmat) generate a matrix with same number on upleft/rightdown diag, with 0 for main diag and negative number for half right part -# diag.mean.mat1.obs already obtained above -diag.sd.mat1.obs <- sapply(0:-(nrow(mat1.obs) - 1), FUN = function(x){sd(mat1.obs[tempo.coord == x], na.rm = TRUE)}) # BEWARE: the last is NA because a single value for this corner diag -diag.cor.mat1.obs <- suppressWarnings(sapply(0:-(nrow(mat1.obs) - 1), FUN = function(x){cor(x = mat1.obs[tempo.coord == x], y = mat2.obs[tempo.coord == x], use = "pairwise.complete.obs", method = "spearman")})) # BEWARE: the last is NA because a single value for this corner diag # to remove the sd null message -mean.sd.cv.cor.mat1.obs <- as.matrix(data.frame(MEAN = diag.mean.mat1.obs, SD = diag.sd.mat1.obs, CV = diag.sd.mat1.obs / diag.mean.mat1.obs, COR = diag.cor.mat1.obs)) - -# replacement of the NA SD CV and CORR on the last line by ZER0: no consequence because we do not use SD, we use CV for taking one of them among the max and CORR we also use high values -if( ! all(is.na(mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), c("SD", "CV", "COR")]))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat1.obs SHOULD BE NA FOR SD, CV AND CORR, BECAUSE A SINGLE VALUE IN THE CORNER DIAGONAL\n\n========\n\n") -cat(tempo.cat) -print(mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), ]) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), ], output = log.file, path = path.out, rownames.kept = TRUE) -}else{ -mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), c("SD", "CV", "COR")] <- 0 -tempo.cat <- paste0("FOR SD, CV AND CORR COLUMNS, REPLACEMENT OF the NA IN THE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat1.obs BY ZERO\nNO CONSEQUENCE BECAUSE WE DO NOT USE SD, WE USE CV FOR TAKING ONE OF THEM AMONG THE MAX AND FOR CORR WE ALSO USE HIGH VALUES\n") -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat1.obs[nrow(mean.sd.cv.cor.mat1.obs), ], output = log.file, path = path.out, rownames.kept = TRUE) -} -# replacement of c(-Inf, NA, Inf) by NA -# tempo.detect <- apply(apply(mean.sd.cv.cor.mat1.obs, 2, FUN = "%in%", c(0, -Inf, NA, Inf)), 1, "any") # per row -tempo.detect <- apply(mean.sd.cv.cor.mat1.obs, 2, FUN = "%in%", c(-Inf, NA, Inf)) # in the matrix -if(all(is.na(tempo.detect[nrow(tempo.detect), c("SD", "CV", "COR")])) == TRUE & (tempo.detect[nrow(tempo.detect), "MEAN"] == FALSE)){ -tempo.cat <- paste0("THE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat1.obs IS NA FOR SD, CV and CORR, BUT THIS IS EXPECTED BECAUSE A SINGLE VALUE IN THE CORNER DIAGONAL") -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat1.obs[tempo.detect, ], output = log.file, path = path.out) -}else if(all(is.na(tempo.detect[nrow(tempo.detect), c("SD", "CV", "COR")])) == TRUE & (tempo.detect[nrow(tempo.detect), "MEAN"] == TRUE)){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat1.obs CANNOT BE -Inf, NA OR Inf FOR THE MEAN COLUMN\n\n========\n\n") -cat(tempo.cat) -print(mean.sd.cv.cor.mat1.obs[nrow(tempo.detect), ]) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = mean.sd.cv.cor.mat1.obs[nrow(tempo.detect), ], output = log.file, path = path.out, rownames.kept = TRUE) -}else if(any(tempo.detect[-nrow(tempo.detect), ]) == TRUE | any(tempo.detect[nrow(tempo.detect), "MEAN"]) == TRUE){ -tempo.cat <- paste0("BEWARE: EXCEPT THE LAST ROW (I.E., CORNER DIAGONAL), ALSO PRESENCE OF -Inf, NA, OR Inf DETECTED IN mean.sd.cv.cor.mat1.obs, WHICH WILL PUT NA IN THEORETICAL MATRIX 1, WHICH IS NOT COMPATIBLE WITH SERPENTINE. REPLACEMENT OF NA WILL BE MADE THENAFTER") -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat1.obs[apply(tempo.detect, 1, FUN = any), ]) -# print(mean.sd.cv.cor.mat1.obs[tempo.detect, ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat1.obs[apply(tempo.detect, 1, FUN = any), ], output = log.file, path = path.out, rownames.kept = TRUE) -# fun_report(data = mean.sd.cv.cor.mat1.obs[tempo.detect, ], output = log.file, path = path.out, rownames.kept = TRUE) -mean.sd.cv.cor.mat1.obs[tempo.detect] <- NA # NA, -Inf and Inf replaced by NA -} -mean.sd.cv.cor.mat1.obs <- as.data.frame(mean.sd.cv.cor.mat1.obs) -# end replacement of c(-Inf, NA, Inf) by NA -# replacement of mean, sd, cor by zero if common null diag between mat1.obs and mat2.obs -if(any(common.null.mean.pos)){ -mean.sd.cv.cor.mat1.obs[common.null.mean.pos, c("MEAN", "SD", "COR")] <- 0 -tempo.cat <- paste0("BEWARE: IF AT LEAST ONE OF THE OBSERVED DIAG MEAN IS ZERO, THE CODE WILL CONSIDER THAT THE DIAG MEANS, SD, AND CORR ARE ZERO FOR THE TWO THEO CORRESPONDING DIAGONALES") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- "REPLACEMENT BY ZERO IN THE THEO MAT 1" -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat1.obs[apply(tempo.detect, 1, FUN = any), ]) -# print(mean.sd.cv.cor.mat1.obs[tempo.detect, ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat1.obs[common.null.mean.pos, ], output = log.file, path = path.out, rownames.kept = TRUE) -} -# end replacement of mean, sd, cor by zero if common null diag between mat1.obs and mat2.obs -# END MATRIX 1 - - - -# MATRIX 2 -# Means and sd are computed for each diagonal -tempo.coord <- row(mat2.obs) - col(mat2.obs) # row(pmat)-col(pmat) generate a matrix with same number on upleft/rightdown diag, with 0 for main diag and negative number for half right part -# diag.mean.mat2.obs already obtained above -diag.sd.mat2.obs <- sapply(0:-(nrow(mat2.obs) - 1), FUN = function(x){sd(mat2.obs[tempo.coord == x], na.rm = TRUE)}) # BEWARE: the last is NA because a single value for this corner diag -diag.cor.mat2.obs <- suppressWarnings(sapply(0:-(nrow(mat2.obs) - 1), FUN = function(x){cor(x = mat1.obs[tempo.coord == x], y = mat2.obs[tempo.coord == x], use = "pairwise.complete.obs", method = "spearman")})) # BEWARE: the last is NA because a single value for this corner diag # to remove the sd null message -mean.sd.cv.cor.mat2.obs <- as.matrix(data.frame(MEAN = diag.mean.mat2.obs, SD = diag.sd.mat2.obs, CV = diag.sd.mat2.obs / diag.mean.mat2.obs, COR = diag.cor.mat2.obs)) - -# replacement of the NA SD CV and CORR on the last line by ZER0: no consequence because we do not use SD, we use CV for taking one of them among the max and CORR we also use high values -if( ! all(is.na(mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), c("SD", "CV", "COR")]))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat2.obs SHOULD BE NA FOR SD, CV AND CORR, BECAUSE A SINGLE VALUE IN THE CORNER DIAGONAL\n\n========\n\n") -cat(tempo.cat) -print(mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), ]) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), ], output = log.file, path = path.out, rownames.kept = TRUE) -}else{ -mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), c("SD", "CV", "COR")] <- 0 -tempo.cat <- paste0("FOR SD, CV AND CORR COLUMNS, REPLACEMENT OF the NA IN THE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat2.obs BY ZERO\nNO CONSEQUENCE BECAUSE WE DO NOT USE SD, WE USE CV FOR TAKING ONE OF THEM AMONG THE MAX AND FOR CORR WE ALSO USE HIGH VALUES\n") -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat2.obs[nrow(mean.sd.cv.cor.mat2.obs), ], output = log.file, path = path.out, rownames.kept = TRUE) -} -# replacement of c(-Inf, NA, Inf) by NA -# tempo.detect <- apply(apply(mean.sd.cv.cor.mat2.obs, 2, FUN = "%in%", c(0, -Inf, NA, Inf)), 1, "any") # per row -tempo.detect <- apply(mean.sd.cv.cor.mat2.obs, 2, FUN = "%in%", c(-Inf, NA, Inf)) # in the matrix -if(all(is.na(tempo.detect[nrow(tempo.detect), c("SD", "CV", "COR")])) == TRUE & (tempo.detect[nrow(tempo.detect), "MEAN"] == FALSE)){ -tempo.cat <- paste0("THE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat2.obs IS NA FOR SD, CV and CORR, BUT THIS IS EXPECTED BECAUSE A SINGLE VALUE IN THE CORNER DIAGONAL") -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat2.obs[tempo.detect, ], output = log.file, path = path.out) -}else if(all(is.na(tempo.detect[nrow(tempo.detect), c("SD", "CV", "COR")])) == TRUE & (tempo.detect[nrow(tempo.detect), "MEAN"] == TRUE)){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE LAST LINE (I.E., CORNER DIAGONAL) OF mean.sd.cv.cor.mat2.obs CANNOT BE -Inf, NA OR Inf FOR THE MEAN COLUMN\n\n========\n\n") -cat(tempo.cat) -print(mean.sd.cv.cor.mat2.obs[nrow(tempo.detect), ]) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = mean.sd.cv.cor.mat2.obs[nrow(tempo.detect), ], output = log.file, path = path.out, rownames.kept = TRUE) -}else if(any(tempo.detect[-nrow(tempo.detect), ]) == TRUE | any(tempo.detect[nrow(tempo.detect), "MEAN"]) == TRUE){ -tempo.cat <- paste0("BEWARE: EXCEPT THE LAST ROW (I.E., CORNER DIAGONAL), ALSO PRESENCE OF -Inf, NA, OR Inf DETECTED IN mean.sd.cv.cor.mat2.obs, WHICH WILL PUT NA IN THEORETICAL MATRIX 2, WHICH IS NOT COMPATIBLE WITH SERPENTINE. REPLACEMENT OF NA WILL BE MADE THENAFTER") -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat2.obs[apply(tempo.detect, 1, FUN = any), ]) -# print(mean.sd.cv.cor.mat2.obs[tempo.detect, ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat2.obs[apply(tempo.detect, 1, FUN = any), ], output = log.file, path = path.out, rownames.kept = TRUE) -# fun_report(data = mean.sd.cv.cor.mat2.obs[tempo.detect, ], output = log.file, path = path.out, rownames.kept = TRUE) -mean.sd.cv.cor.mat2.obs[tempo.detect] <- NA # NA, -Inf and Inf replaced by NA -} -mean.sd.cv.cor.mat2.obs <- as.data.frame(mean.sd.cv.cor.mat2.obs) -# end replacement of c(-Inf, NA, Inf) by NA -# replacement of mean, sd, cor by zero if common null diag between mat2.obs and mat2.obs -if(any(common.null.mean.pos)){ -mean.sd.cv.cor.mat2.obs[common.null.mean.pos, c("MEAN", "SD", "COR")] <- 0 -tempo.cat <- paste0("BEWARE: IF AT LEAST ONE OF THE OBSERVED DIAG MEAN IS ZERO, THE CODE WILL CONSIDER THAT THE DIAG MEANS, SD, AND CORR ARE ZERO FOR THE TWO THEO CORRESPONDING DIAGONALES") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- "REPLACEMENT BY ZERO IN THE THEO MAT 2" -# cat(paste0("\n", tempo.cat, "\n")) -# print(mean.sd.cv.cor.mat2.obs[apply(tempo.detect, 1, FUN = any), ]) -# print(mean.sd.cv.cor.mat2.obs[tempo.detect, ]) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = mean.sd.cv.cor.mat2.obs[common.null.mean.pos, ], output = log.file, path = path.out, rownames.kept = TRUE) -} -# end replacement of mean, sd, cor by zero if common null diag between mat2.obs and mat2.obs -# END MATRIX 2 - - -################ End Analysis of observed matrices and data extraction - - -################ plot verification - - -# homogeneous scale -m_sd.coord.obs <- fun_gg_scatter(data1 = list(L1 = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat1.obs[, c("MEAN", "SD")])}else{mean.sd.cv.cor.mat1.obs[, c("MEAN", "SD")]}, L2 = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat2.obs[, c("MEAN", "SD")])}else{mean.sd.cv.cor.mat2.obs[, c("MEAN", "SD")]}), x = list(L1 = "MEAN", L2 = "MEAN"), y = list(L1 = "SD", L2 = "SD"), geom = list("geom_point", "geom_point"), alpha = list(0.5, 0.5), xlog = transfo, ylog = transfo, plot = FALSE, return = TRUE) -m_sd.x.range <- m_sd.coord.obs$axes$x.range -m_sd.y.range <- m_sd.coord.obs$axes$y.range -m_cor.coord.obs <- fun_gg_scatter(data1 = list(L1 = data.frame(MEAN = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat1.obs[, "MEAN"])}else{mean.sd.cv.cor.mat1.obs[, "MEAN"]}, mean.sd.cv.cor.mat1.obs["COR"]), L2 = data.frame(MEAN = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat2.obs[, "MEAN"])}else{mean.sd.cv.cor.mat2.obs[, "MEAN"]}, mean.sd.cv.cor.mat2.obs["COR"])), x = list(L1 = "MEAN", L2 = "MEAN"), y = list(L1 = "COR", L2 = "COR"), geom = list("geom_point", "geom_point"), alpha = list(0.5, 0.5), xlog = transfo, ylog = "no", plot = FALSE, return = TRUE, ylim = c(-1, 1)) -m_cor.x.range <- m_cor.coord.obs$axes$x.range -m_cor.y.range <- m_cor.coord.obs$axes$y.range -m_cv.coord.obs <- fun_gg_scatter(data1 = list(L1 = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat1.obs[, c("MEAN", "CV")])}else{mean.sd.cv.cor.mat1.obs[, c("MEAN", "CV")]}, L2 = if(transfo != "no"){get(transfo)(mean.sd.cv.cor.mat2.obs[, c("MEAN", "CV")])}else{mean.sd.cv.cor.mat2.obs[, c("MEAN", "CV")]}), x = list(L1 = "MEAN", L2 = "MEAN"), y = list(L1 = "CV", L2 = "CV"), geom = list("geom_point", "geom_point"), alpha = list(0.5, 0.5), xlog = transfo, ylog = transfo, plot = FALSE, return = TRUE) -m_cv.x.range <- m_cv.coord.obs$axes$x.range -m_cv.y.range <- m_cv.coord.obs$axes$y.range -# end homogeneous scale - -# MATRIX1 -# heatmap: matrix checking -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "OBSERVED MATRIX 1:\nCOLLECTION OF DIAGONAL PARAMETERS", text.size = 3) -# if(activate.pdf == TRUE){ -# invisible(dev.set(pdf.nb)) -# }else{ -# fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -# } -# tempo.title <- paste0("MAT1 OBS\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)") -# tempo.data.plot <- mat1.obs / mean(mat1.obs[is.finite(mat1.obs)], na.rm = TRUE) # -# if(transfo != "no"){ -# tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -# } -# fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range, na.rm = TRUE), max(heatmap.range, na.rm = TRUE)), midpoint1 = mean(heatmap.range, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -# end heatmap: matrix checking - -tempo.data.plot <- mean.sd.cv.cor.mat1.obs -tempo.data.plot2 <- data.frame(MEAN = tempo.data.plot$MEAN, MEAN.MINUS.SD = tempo.data.plot$MEAN - tempo.data.plot$SD, MEAN.PLUS.SD = tempo.data.plot$MEAN + tempo.data.plot$SD) # created here to be before log transformation -if(transfo != "no"){ -tempo.data.plot[, c("MEAN", "SD", "CV")] <- get(transfo)(tempo.data.plot[, c("MEAN", "SD", "CV")]) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -} - -# mean versus index plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if(transfo != "no"){ -tempo.data.plot2 <- suppressWarnings(get(transfo)(tempo.data.plot2)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot2$MEAN.MINUS.SD))){ -tempo.data.plot2$MEAN.MINUS.SD[is.na(tempo.data.plot2$MEAN.MINUS.SD)] <- tempo.data.plot2$MEAN[is.na(tempo.data.plot2$MEAN.MINUS.SD)] -} -} -tempo.data.plot2 <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot2), tempo.data.plot2) -tempo.title <-paste0("MAT1 OBS\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nDIAGONAL NB FROM MAIN TO CORNER\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "DIAGONAL NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot -# Mean Deviation (MD) plot of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT1 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range, ylog = transfo, ylab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end Mean Deviation (MD) plot of the observed matrix - -# mean / cor of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT1 OBS\nMEAN VERSUS MAT1 OBS / MAT2 OBS SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range, ylog = "no", ylab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end mean / cor of the observed matrix - -# mean / cv of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT1 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND CV LOG2(x) "}else if(transfo == "log10"){"MEAN AND CV LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cv.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cv.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_cv.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cv.y.range, ylog = transfo, ylab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs -# end mean / cv of the observed matrix - -# cv selection using increasing windows to detect the average constant cv -tempo.cv.mean.mat <- mean.sd.cv.cor.mat1.obs[ ! (mean.sd.cv.cor.mat1.obs$CV == 0 | mean.sd.cv.cor.mat1.obs$MEAN == 0 | is.na(mean.sd.cv.cor.mat1.obs$MEAN) | is.na(mean.sd.cv.cor.mat1.obs$CV)), ] # remove 0 and NA in mean and CV -sort.cv1 <- sort(tempo.cv.mean.mat$CV) -sort.mean <- tempo.cv.mean.mat$MEAN[order(tempo.cv.mean.mat$CV, na.last = NA)] # means sorted like cv -# n.cv <- length(sort.cv1) -win.size.ini <- win.size -if(win.size >= length(sort.cv1)){ -tempo.warning <- paste0("THE win.size PARAMETER SETTING (", win.size, ") IS OVER OR EQUAL TO THE NUMBER OF NON NA DIAGONAL CV OF THE OBSERVED MATRIX 1 (", length(sort.cv1), ")\nTHE win.size PARAMETER HAS BEEN RESET TO VALUE: ", length(sort.cv1) - 1) -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -win.size <- length(sort.cv1) - 1 -fun_report(data = tempo.warning, output = log.file, path = path.out) -} -if(win.size < 2){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nwin.size IS LESS THAN 2. NO BEST CV CAN BE COMPUTED\n\n========\n\n") -stop(tempo.cat) -} -if(win.size < length(sort.cv1)){ -cv.win.mean <- vector("numeric", win.size - 1) # mean cv in the increasing window of the sorted cv -cv.win.mean[] <- NA -cv.win.sd <- vector("numeric", win.size - 1) # sd cv in the increasing window of the sorted cv -cv.win.sd[] <- NA -mean.win.median <- vector("numeric", win.size - 1) # median in the increasing window of the sorted mean -mean.win.median[] <- NA -mean.win.mean <- vector("numeric", win.size - 1) # for the cv selection: mean in the increasing window of the sorted mean -mean.win.mean[] <- NA -for(i0 in 2:win.size){ # to take at least 2 values, to avoid a first NA -cv.win.mean[i0 - 1] <- mean(sort.cv1[1:i0], na.rm = TRUE) -cv.win.sd[i0 - 1] <- sd(sort.cv1[1:i0], na.rm = TRUE) -mean.win.median[i0 - 1] <- median(sort.mean[1:i0], na.rm = TRUE) -mean.win.mean[i0 - 1] <- mean(sort.mean[1:i0], na.rm = TRUE) -} -if(any(is.na(cv.win.mean))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\ncv.win.mean SHOULD NOT HAVE ANY NA\n\n========\n\n") -stop(tempo.cat) -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -} -if(any(is.na(cumsum(diff(cv.win.sd / mean.win.mean))))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nNO NA SHOULD BE PRESENT IN cv.win.sd / mean.win.mean\n\n========\n\n") -cat(tempo.cat) -print(cv.win.sd / mean.win.mean) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = cv.win.sd / mean.win.mean, output = log.file, path = path.out, rownames.kept = TRUE) -}else{ -cv.select.nb <- which.min(cumsum(diff(cv.win.sd / mean.win.mean))) -} -i0 <- 1 -while(median(sort.cv1[i0:cv.select.nb], na.rm = TRUE) == 0 | median(sort.mean[i0:cv.select.nb], na.rm = TRUE) == 0){ # loop to avoid to take zero values in sort.cv1 and sort.mean -i0 <- i0 + 1 -cv.select.nb <- cv.select.nb + 1 -} -cv.select.mat1.obs <- median(sort.cv1[i0:cv.select.nb], na.rm = TRUE) # cv selected is the median of the xxx first cv -mean.select.mat1.obs <- median(sort.mean[i0:cv.select.nb], na.rm = TRUE) - -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT1 OBS\nSLIDING CV COMPUTATION\nBLUE DOTS: MEAN+/-SD OF CV IN INCREASING WINDOWS OF CV VALUES\nGREEN LINE: SELECTED CV VALUE IS ", round(cv.select.mat1.obs, 2), ", BASED ON MEDIAN OF ", cv.select.nb, " BLUE DOTS\nRED LINE: POISSON DISTRIB\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_cv.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cv.y.range, ylog = transfo, ylab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat1.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) -# cv selection using increasing windows to detect the average constant cv -# END MATRIX 1 - - -# MATRIX2 -# heatmap: matrix checking -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "OBSERVED MATRIX 2:\nCOLLECTION OF DIAGONAL PARAMETERS", text.size = 3) -# if(activate.pdf == TRUE){ -# invisible(dev.set(pdf.nb)) -# }else{ -# fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -# } -# tempo.title <- paste0("MAT2 OBS\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)") -# tempo.data.plot <- mat2.obs / mean(mat2.obs[is.finite(mat2.obs)], na.rm = TRUE) # -# if(transfo != "no"){ -# tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -# } -# fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range, na.rm = TRUE), max(heatmap.range, na.rm = TRUE)), midpoint1 = mean(heatmap.range, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -# end heatmap: matrix checking - -tempo.data.plot <- mean.sd.cv.cor.mat2.obs -tempo.data.plot2 <- data.frame(MEAN = tempo.data.plot$MEAN, MEAN.MINUS.SD = tempo.data.plot$MEAN - tempo.data.plot$SD, MEAN.PLUS.SD = tempo.data.plot$MEAN + tempo.data.plot$SD) # created here to be before log transformation -if(transfo != "no"){ -tempo.data.plot[, c("MEAN", "SD", "CV")] <- get(transfo)(tempo.data.plot[, c("MEAN", "SD", "CV")]) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -} - - -# mean versus index plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if(transfo != "no"){ -tempo.data.plot2 <- suppressWarnings(get(transfo)(tempo.data.plot2)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot2$MEAN.MINUS.SD))){ -tempo.data.plot2$MEAN.MINUS.SD[is.na(tempo.data.plot2$MEAN.MINUS.SD)] <- tempo.data.plot2$MEAN[is.na(tempo.data.plot2$MEAN.MINUS.SD)] -} -} -tempo.data.plot2 <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot2), tempo.data.plot2) -tempo.title <-paste0("MAT2 OBS\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nDIAGONAL NB FROM MAIN TO CORNER\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot2), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "DIAGONAL NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.x.range, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot2$COLUMN_NB, xend = tempo.data.plot2$COLUMN_NB, y = tempo.data.plot2$MEAN.MINUS.SD, yend = tempo.data.plot2$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot -# Mean Deviation (MD) plot of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT2 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("SD", "y.sd.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range, ylog = transfo, ylab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end Mean Deviation (MD) plot of the observed matrix - -# mean / cor of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT2 OBS\nMEAN VERSUS MAT1 OBS / MAT2 OBS SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range, ylog = "no", ylab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end mean / cor of the observed matrix - -# mean / cv of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT2 OBS\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND CV LOG2(x) "}else if(transfo == "log10"){"MEAN AND CV LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cv.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cv.y.range, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\n") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_cv.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cv.y.range, ylog = transfo, ylab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_cv.coord.obs -# end mean / cv of the observed matrix - -# cv selection using increasing windows to detect the average constant cv -tempo.cv.mean.mat <- mean.sd.cv.cor.mat2.obs[ ! (mean.sd.cv.cor.mat2.obs$CV == 0 | mean.sd.cv.cor.mat2.obs$MEAN == 0 | is.na(mean.sd.cv.cor.mat2.obs$MEAN) | is.na(mean.sd.cv.cor.mat2.obs$CV)), ] # remove 0 and NA in mean and CV -sort.cv2 <- sort(tempo.cv.mean.mat$CV) -sort.mean <- tempo.cv.mean.mat$MEAN[order(tempo.cv.mean.mat$CV, na.last = NA)] # means sorted like cv -# n.cv <- length(sort.cv2) -win.size <- win.size.ini # in case modified during first MAT1 analysis -if(win.size >= length(sort.cv2)){ -tempo.warning <- paste0("THE win.size PARAMETER SETTING (", win.size, ") IS OVER OR EQUAL TO THE NUMBER OF NON NA DIAGONAL CV OF THE OBSERVED MATRIX 2 (", length(sort.cv2), ")\nTHE win.size PARAMETER HAS BEEN RESET TO VALUE: ", length(sort.cv2) - 1) -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -win.size <- length(sort.cv2) - 1 -fun_report(data = tempo.warning, output = log.file, path = path.out) -} -if(win.size < 2){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nwin.size IS LESS THAN 2. NO BEST CV CAN BE COMPUTED\n\n========\n\n") -stop(tempo.cat) -} -if(win.size < length(sort.cv2)){ -cv.win.mean <- vector("numeric", win.size - 1) # mean cv in the increasing window of the sorted cv -cv.win.mean[] <- NA -cv.win.sd <- vector("numeric", win.size - 1) # sd cv in the increasing window of the sorted cv -cv.win.sd[] <- NA -mean.win.median <- vector("numeric", win.size - 1) # median in the increasing window of the sorted mean -mean.win.median[] <- NA -mean.win.mean <- vector("numeric", win.size - 1) # for the cv selection: mean in the increasing window of the sorted mean -mean.win.mean[] <- NA -for(i0 in 2:win.size){ # to take at least 2 values, to avoid a first NA -cv.win.mean[i0 - 1] <- mean(sort.cv2[1:i0], na.rm = TRUE) -cv.win.sd[i0 - 1] <- sd(sort.cv2[1:i0], na.rm = TRUE) -mean.win.median[i0 - 1] <- median(sort.mean[1:i0], na.rm = TRUE) -mean.win.mean[i0 - 1] <- mean(sort.mean[1:i0], na.rm = TRUE) -} -if(any(is.na(cv.win.mean))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\ncv.win.mean SHOULD NOT HAVE ANY NA\n\n========\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -} -if(any(is.na(cumsum(diff(cv.win.sd / mean.win.mean))))){ -tempo.cat <- paste0("\n\n========\n\nINTERNAL ERROR CODE IN SLITHERINE\nNO NA SHOULD BE PRESENT IN cv.win.sd / mean.win.mean\n\n========\n\n") -cat(tempo.cat) -print(cv.win.sd / mean.win.mean) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -# fun_report(data = cv.win.sd / mean.win.mean, output = log.file, path = path.out, rownames.kept = TRUE) -}else{ -cv.select.nb <- which.min(cumsum(diff(cv.win.sd / mean.win.mean))) -} -i0 <- 1 -while(median(sort.cv2[i0:cv.select.nb], na.rm = TRUE) == 0 | median(sort.mean[i0:cv.select.nb], na.rm = TRUE) == 0){ # loop to avoid to take zero values in sort.cv2 and sort.mean -i0 <- i0 + 1 -cv.select.nb <- cv.select.nb + 1 -} -cv.select.mat2.obs <- median(sort.cv2[i0:cv.select.nb], na.rm = TRUE) # cv selected is the median of the xxx first cv -mean.select.mat2.obs <- median(sort.mean[i0:cv.select.nb], na.rm = TRUE) - -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT2 OBS\nSLIDING CV COMPUTATION\nBLUE DOTS: MEAN+/-SD OF CV IN INCREASING WINDOWS OF CV VALUES\nGREEN LINE: SELECTED CV VALUE IS ", round(cv.select.mat2.obs, 2), ", BASED ON MEDIAN OF ", cv.select.nb, " BLUE DOTS\nRED LINE: POISSON DISTRIB\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois), x = list("MEAN", "x.pois"), y = list("CV", "y.cv.pois"), color = list(grey(0.40), "red"), geom = list("geom_point", "geom_line"), alpha = list(0.5, 1), dot.size = dot.size, line.size = line.size, xlim = m_cv.x.range, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cv.y.range, ylog = transfo, ylab = "CV", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0, add = paste0('+ ggplot2::geom_point(data = data.frame(x = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean)), ggplot2::aes(x = x, y = y), color = "blue", size = dot.size, alpha = 0.3) + ggplot2::geom_segment(data = data.frame(x = get(transfo)(mean.win.median), xend = get(transfo)(mean.win.median), y = get(transfo)(cv.win.mean - cv.win.sd), yend = get(transfo)(cv.win.mean + cv.win.sd)), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "blue", alpha = 0.3) + ggplot2::geom_hline(data = data.frame(y = get(transfo)(cv.select.mat2.obs)), ggplot2::aes(yintercept = y), color = "green", size = line.size) + ggplot2::theme_classic(base_size = text.size)', if(raster == TRUE){'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size), aspect.ratio = 1)'}else{'+ ggplot2::theme(text = ggplot2::element_text(size = text.size), plot.title = ggplot2::element_text(size = title.text.size))'})) -# cv selection using increasing windows to detect the average constant cv -# END MATRIX 2 - - -################ End plot verification - - -################ Theoretical matrices - - -# MATRIX 1 -cat("\nTHEORETICAL MATRIX DESIGN\n") -fun_report(data = "\n\n################################ THEORETICAL MATRIX DESIGN", path = path.out, output = log.file, sep = 4) -n1 <- nrow(mean.sd.cv.cor.mat1.obs) -mu1 <- sort(mean.sd.cv.cor.mat1.obs$MEAN, decreasing = TRUE, na.last = TRUE) # need the ordering to work on correlation (see below), decreasing = TRUE to have the first column in theo1 as the main diagonal -if(cv.rho.obtained == TRUE){ -source(path.cv.rho) # -tempo.cat <- paste0("BEWARE: THE SAVED FILE:\n", paste0(path.out, "/cv1_cv2_rho1_rho2_backup.RData"), "\nIS MAD OF VALUES COMING FROM THE DOWNLOAD OF:\n", path.cv.rho) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else{ -rho1 <- mean.sd.cv.cor.mat1.obs$COR[order(mean.sd.cv.cor.mat1.obs$MEAN, decreasing = TRUE, na.last = TRUE)] -cv1 <- cv.select.mat1.obs # see above -} -# mean correction before subsampling -# Subsampling is necessary because we cannot have any mean of a diagonal less than t = 1/cv1^2. -# Thus, we have to multiply all the value to increase diagonal means, then use the BN law, and then subsample to go back to the initial means -# see Negative binomial law, CV m relationship and subsampling.docx do understand the origin of these formula (equation numbers) -t1 <- 1 / cv1^2 # equation (1) -p1 <- exp(-t1 /mu1) # equation (8). Related to equation (9). In these equations, m cannot go below mt, but we do not care here for the correction -r1 <- t1 / p1 # equation (7) -# Beware: one sub.factor per diagonal: meaning that we do not apply the same up raising and dowsampling coeff for all the diag -sub.factor1 = t1 / ( mu1 * ( 1 - p1)) # equation (9), with mt = m1 (obs) and m = mean.nb1 (theo mean of NB). equation (9) / m1 -> t1 / ( mu1 * ( 1 - exp(-t1/mu1))) and sub.factor1 = m / mt, ie, sub.factor1 = mean.nb1 / m1. Thus, for the down sampling, to go back to m1, we will have to divide by sub.factor1 -mean.nb1 <- p1 * r1 / (1 - p1) # equation (2), just for graphs -sd.nb1 <- (p1 * r1 / (1 - p1)^2)^0.5 # equation (3), just for graphs -# end mean correction before subsampling -# because in two contact matrix, the values in the largest diagonal, where the mean is the biggest, are very correlated, and because this correlation decreases in other diagonales when taking distance from the largest diag, we have to sort the generated values by the neg binom distrib, more or less depending on the mean used. - -if(is.null(n.row)){ -n.row <- n1 -} -mat1.ini <- matrix(NA, nrow = n.row, ncol = n1) # each column of mat1.ini is filled with random values according to neg binom distrib, with mean increasing for each column of mat1.ini -# solving the fact that when p is less than 1e-16, 1-p is rounded by R to 1, which results in prob = 1 in the rnbinom() function and only 0 returned -p1.rd <- p1 -r1.rd <- r1 -cutoff.pb <- 1e-13 # cut-off at which p is ok -if(any(p1.rd < cutoff.pb & p1.rd > 0)){ -cutoff.pos1 <- which.max(p1.rd < cutoff.pb & p1.rd > 0) - 1 -if(cutoff.pos1 == 0){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nCODE CANNOT WORK IF cutoff.pos1 IS EQUAL TO ZERO\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -}else{ -r1.rd[p1.rd < cutoff.pb & p1.rd > 0] <- r1.rd[cutoff.pos1] # beware: must be before p2.rd -p1.rd[p1.rd < cutoff.pb & p1.rd > 0] <- p1.rd[cutoff.pos1] -} -} -# end solving the fact that when p is less than 1e-16, 1-p is rounded by R to 1, which results in prob = 1 in the rnbinom() function and only 0 returned -for(i1 in 1:length(mu1)){ -if( mu1[i1] != 0){ # otherwise cannot compute rnbinom() because p Infinite -mat1.ini[, i1] <- rnbinom(n = n.row, size = r1.rd[i1], prob = 1 - p1.rd[i1]) # because prob is the proba of success in R, while it is the proba of failure in the equation -}else{ -mean.nb1[i1] <- 0 # just for graphs -sd.nb1[i1] <- NA # just for graphs -mat1.ini[, i1] <- 0 -tempo.cat <- paste0("BEWARE: IN LOOP ", i1,", mu1 HAS A ZERO VALUE: NO NEG BINOM LAW USED -> MATRIX COLUMN FILLED WITH ZERO") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -} -cv.nb1 <- sd.nb1 / mean.nb1 # just for graphs -options.ini <- options()$digits -options(digits = 22) -if(any((cv1^2 < 1/mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))]) | (sapply(FUN = all.equal, 1/mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))], cv1^2) == TRUE))){ # because we need sigma^2 > mean.nb1 for neg binom (otherwise, we have to use poisson) -# Then sigma^2 / mean.nb1^2 < mean.nb1 / mean.nb1^2 -> cv1^2 > 1/mean.nb1 -tempo.cat <- paste0("\n\n========\n\nPROBLEM: cv1 PARAMETER MUST BE SUCH THAT cv1^2 > 1/mean.nb1\ncv1 : ", cv1, "\ncv1^2 : ", cv1^2, "\nmean.nb1 VALUES DO NOT RESPECTING THE FORMULA (ZERO NOT CONSIDERED): ", paste(mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))][cv1^2 <= 1/mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))]], collapse = " "), "\n1/mean.nb1: ", paste(1 / mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))][cv1^2 <= 1/mean.nb1[ ! (is.na(mean.nb1) | ! is.finite(1 / mean.nb1))]], collapse = " "), "\n\n========\n\n") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -options(digits = options.ini) - - -# MATRIX 2 -# see matrix1 for the explanaition -n2 <- nrow(mean.sd.cv.cor.mat2.obs) -if( ! identical(n1, n2)){ # important -tempo.cat <- paste0("\n\n================\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE TWO ROW NUMBERS OF OBS MAT1 AND MAT2 ARE DIFFERENT: ", paste(c(n1, n2), collapse = " "), "\n\n================\n\n") # message for developers -stop(tempo.cat) -} -mu2 <- sort(mean.sd.cv.cor.mat2.obs$MEAN, decreasing = TRUE, na.last = TRUE) # need the ordering to work on correlation (see below) -if(cv.rho.obtained == TRUE){ -# source(path.cv.rho) # inactivated because already done above -}else{ -rho2 <- mean.sd.cv.cor.mat2.obs$COR[order(mean.sd.cv.cor.mat2.obs$MEAN, decreasing = TRUE, na.last = TRUE)] -cv2 <- cv.select.mat2.obs # see above -} -if( ! identical(sort(rho1), sort(rho2))){ # important because rho1 will be modified, and rho2 kept as the initial correlation between obs mat1 and mat2 -tempo.cat <- paste0("\n\n================\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE TWO CORRELATIONS rho1 AND rho2 ARE NOT IDENTICAL\n\n================\n\n") # message for developers -stop(tempo.cat) -} -save(list = c("cv1", "rho1", "cv2", "rho2"), file = paste0(path.out, "/cv1_cv2_rho1_rho2_backup.RData")) -tempo.cat <- paste0("CORRELATIONS AND SELECTED COEFFICIENTS OF VARIATION SAVED IN: ", paste0(path.out, "/cv1_cv2_rho1_rho2_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) - -t2 <- 1 / cv2^2 # equation (1) -p2 <- exp(-t2 /mu2) # equation (8). Related to equation (9). In these equations, m cannot go below mt, but we do not care here for the correction -r2 <- t2 / p2 # equation (7) -sub.factor2 = t2 / ( mu2 * ( 1 - p2)) # equation (9), with mt = m2 (obs) and m = mean.nb2 (theo mean of NB). equation (9) / m2 -> t2 / ( mu2 * ( 1 - exp(-t2/mu2))) and sub.factor2 = m / mt, ie, sub.factor2 = mean.nb2 / m2. Thus, for the down sampling, to go back to m2, we will have to divide by sub.factor2 -mean.nb2 <- p2 * r2 / (1 - p2) # equation (2), just for graphs -sd.nb2 <- (p2 * r2 / (1 - p2)^2)^0.5 # equation (3), just for graphs -mat2.ini <- matrix(NA, nrow = n.row, ncol = n2) # each column of mat2.ini is filled with random values according to neg binom distrib, with mean increasing for each column of mat2.ini. n.row defined above -# solving the fact that when p is less than 1e-16, 1-p is rounded by R to 1, which results in prob = 1 in the rnbinom() function and only 0 returned -p2.rd <- p2 -r2.rd <- r2 -# cutoff.pb # defined above for Mat1 -if(any(p2.rd < cutoff.pb & p2.rd > 0)){ -cutoff.pos2 <- which.max(p2.rd < cutoff.pb & p2.rd > 0) - 1 -if(cutoff.pos2 == 0){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nCODE CANNOT WORK IF cutoff.pos2 IS EQUAL TO ZERO\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -}else{ -r2.rd[p2.rd < cutoff.pb & p2.rd > 0] <- r2.rd[cutoff.pos2] # beware: must be before p2.rd -p2.rd[p2.rd < cutoff.pb & p2.rd > 0] <- p2.rd[cutoff.pos2] -} -} -# end solving the fact that when p is less than 1e-16, 1-p is rounded by R to 1, which results in prob = 1 in the rnbinom() function and only 0 returned -for(i1 in 1:length(mu2)){ -if( mu2[i1] != 0){ # otherwise cannot compute rnbinom() because p Infinite -mat2.ini[, i1] <- rnbinom(n = n.row, size = r2.rd[i1], prob = 1 - p2.rd[i1]) # because prob is the proba of success in R, while it is the proba of failure in the equation -}else{ -mean.nb2[i1] <- 0 -sd.nb2[i1] <- NA -mat2.ini[, i1] <- 0 -tempo.cat <- paste0("BEWARE: IN LOOP ", i1,", mu2 HAS A ZERO VALUE: NO NEG BINOM LAW USED -> MATRIX COLUMN FILLED WITH ZERO") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -} -cv.nb2 <- sd.nb2 / mean.nb2 -options.ini <- options()$digits -options(digits = 22) -if(any((cv2^2 < 1/mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))]) | (sapply(FUN = all.equal, 1/mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))], cv2^2) == TRUE))){ # because we need sigma^2 > mean.nb2 for neg binom (otherwise, we have to use poisson) -# Then sigma^2 / mean.nb2^2 > mean.nb2 / mean.nb2^2 -> cv2^2 > 1/mean.nb2 -tempo.cat <- paste0("\n\n========\n\nPROBLEM: cv2 PARAMETER MUST BE SUCH THAT cv2^2 > 1/mean.nb2\ncv2 : ", cv2, "\ncv2^2 : ", cv2^2, "\nmean.nb2 VALUES DO NOT RESPECTING THE FORMULA (ZERO NOT CONSIDERED): ", paste(mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))][cv2^2 <= 1/mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))]], collapse = " "), "\n1/mean.nb2: ", paste(1 / mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))][cv2^2 <= 1/mean.nb2[ ! (is.na(mean.nb2) | ! is.finite(1 / mean.nb2))]], collapse = " "), "\n\n========\n\n") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -options(digits = options.ini) - - -################ End Theoretical matrices - - -################ plot verification - - -# homogeneous scale -# heatmap range -loop.mat.names <- c("mat1.ini", "mat2.ini") -heatmap.range2 <- NULL -for(i0 in 1:length(loop.mat.names)){ -# data transformtation -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0])[is.finite(get(loop.mat.names[i0]))], na.rm = TRUE) # mean normalization -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) # log transfo -} -# end data transformtation -heatmap.range2 <- range(c(heatmap.range, heatmap.range2, tempo.data.plot), na.rm = TRUE, finite = TRUE) -} -# end heatmap range -# sd homogeneous scale -tempo1 <- data.frame(MEAN = apply(X = mat1.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), SD = apply(mat1.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -tempo2 <- data.frame(MEAN = apply(X = mat2.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), SD = apply(mat2.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -tempo.range <- fun_gg_scatter(data1 = list(L1 = if(transfo != "no"){get(transfo)(tempo1)}else{tempo1}, L2 = if(transfo != "no"){get(transfo)(tempo2)}else{tempo2}), x = list(L1 = "MEAN", L2 = "MEAN"), y = list(L1 = "SD", L2 = "SD"), geom = list("geom_point", "geom_point"), alpha = list(0.5, 0.5), xlog = transfo, ylog = transfo, plot = FALSE, return = TRUE) -m_sd.x.range2 <- range(c(m_sd.x.range, tempo.range$axes$x.range), na.rm = TRUE, finite = TRUE) -m_sd.y.range2 <- range(c(m_sd.y.range, tempo.range$axes$y.range), na.rm = TRUE, finite = TRUE) -# end sd homogeneous scale -# cv homogeneous scale -tempo1 <- tempo1$SD / tempo1$MEAN -tempo2 <- tempo2$SD / tempo2$MEAN -m_cv.x.range2 <- range(c(m_cv.x.range, tempo1, tempo2), na.rm = TRUE, finite = TRUE) -m_cv.y.range2 <- range(c(m_cv.y.range, tempo1, tempo2), na.rm = TRUE, finite = TRUE) -# cor homogeneous scale -m_cor.x.range2 <- range(c(m_cor.x.range, m_cv.x.range2), na.rm = TRUE, finite = TRUE) # because same x-axis -m_cor.y.range2 <- m_cor.y.range # because same x-axis -# end cor homogeneous scale -# end sd homogeneous scale - -# MATRIX 1 -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "THEORETICAL MATRIX 1 DESIGN", text.size = 3) -# heatmap -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT1 THEO\nEACH COLUMN REPRESENTS A DIAGONAL OF MAT1 OBS (MAIN DIAG FULL OF ZERO INCLUDED)\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range2, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)") -tempo.data.plot <- mat1.ini / mean(mat1.ini[is.finite(mat1.ini)], na.rm = TRUE) # was not here before. I tried that -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -} -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range2, na.rm = TRUE), max(heatmap.range2, na.rm = TRUE)), midpoint1 = mean(heatmap.range2, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -# end heatmap - -# Mean Deviation (MD) plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat1.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), SD = apply(mat1.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -tempo.data.negbinom <- data.frame(x.green = get(transfo)(mean.nb1), y.green = get(transfo)(cv.nb1 * mean.nb1)) # neg binomiale distrib -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -tempo.data.negbinom <- data.frame(x.green = mean.nb1, y.green = cv.nb1 * mean.nb1) # neg binomiale distrib -} -tempo.title <- paste0("MAT1 THEO\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range2, ylog = transfo, ylab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end Mean Deviation (MD) plot of the observed matrix -# mean versus index plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat1.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat1.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat1.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat1.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat1.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT1 THEO\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot - - -# MATRIX 2 -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "THEORETICAL MATRIX 2 DESIGN", text.size = 3) -# heatmap -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.title <- paste0("MAT2 THEO\nEACH COLUMN REPRESENTS A DIAGONAL OF MAT2 OBS (MAIN DIAG FULL OF ZERO INCLUDED)\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range2, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)") -tempo.data.plot <- mat2.ini / mean(mat2.ini[is.finite(mat2.ini)], na.rm = TRUE) # was not here before. I tried that -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -} -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = c(min(heatmap.range2, na.rm = TRUE), max(heatmap.range2, na.rm = TRUE)), midpoint1 = mean(heatmap.range2, na.rm = TRUE), title = tempo.title, text.size = heatmap.text.size, title.text.size = title.text.size) -# end heatmap - -# Mean Deviation (MD) plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat2.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), SD = apply(mat2.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -tempo.data.negbinom <- data.frame(x.green = get(transfo)(mean.nb2), y.green = get(transfo)(cv.nb2 * mean.nb2)) # neg binomiale distrib -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -tempo.data.negbinom <- data.frame(x.green = mean.nb2, y.green = cv.nb2 * mean.nb2) # neg binomiale distrib -} -tempo.title <- paste0("MAT2 THEO\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range2, ylog = transfo, ylab = "SD", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end Mean Deviation (MD) plot -# mean versus index plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat2.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat2.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat2.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat2.ini, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat2.ini, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT2 THEO\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot - - -################ End plot verification - - -################ Taking correlation into account - - -cat("\nINCLUDING THE OBS MAT1 / MAT2 DIAGONAL CORRELATIONS INTO THEO MATRICES\n") -fun_report(data = "\n\n################################ INCLUDING THE OBS MAT1 / MAT2 DIAGONAL CORRELATIONS INTO THEO MATRICES", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "INCLUDING THE OBS\nMAT1 / MAT2\nDIAGONAL CORRELATIONS\nINTO THEO MATRICES", text.size = 3) -# mean / cor of the observed matrix -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = (mean.sd.cv.cor.mat1.obs$MEAN + mean.sd.cv.cor.mat2.obs$MEAN) / 2, COR = mean.sd.cv.cor.mat1.obs$COR) -if(transfo != "no"){ -tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap -} -tempo.title <- paste0("(MAT1 OBS / MAT2 OBS) MEAN VERSUS (MAT1 OBS / MAT2 OBS) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range2, ylog = "no", ylab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end mean / cor of the observed matrix -# mean / cor of the theoretical matrices -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- suppressWarnings(data.frame(MEAN = (apply(X = mat1.ini, MARGIN = 2, FUN = "mean") + apply(X = mat2.ini, MARGIN = 2, FUN = "mean")) / 2, COR = mapply(FUN = "cor", x = as.data.frame(mat1.ini), y = as.data.frame(mat2.ini), use = "pairwise.complete.obs", method = "spearman", SIMPLIFY = TRUE))) -if(transfo != "no"){ -tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap -} -tempo.title <- paste0("(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range2, ylog = "no", ylab = "CORRELATION", y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# Permutation of the mat1.ini values to have the mat1.obs correlation on the diagonales between mat1 and mat2 -if(correl.mat.obtained == TRUE){ -tempo.cat <- paste0("BEWARE: mat1.ini, mat1.perm, mat2.ini and mat2.perm THERORETICAL MATRICES USED HAVE BEEN DOWNLOADED FROM:\n", path.theo1.theo2) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -load(paste0(path.theo1.theo2)) -}else{ -if(any(is.na(mat1.ini))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nSHOULD NOT HAVE ANY NA IN mat1.ini\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -if(any(is.na(mat2.ini))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nSHOULD NOT HAVE ANY NA IN mat2.ini\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -mat1.perm <- mat1.ini -mat2.perm <- mat2.ini -# modifiying rho1 -if(single.corr == "VALUE"){ # all the observed corr between mat1.obs and mat2.obs replaced by abs.corr.limit -rho1[ ! is.na(rho1)] <- abs.corr.limit -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY abs.corr.limit") -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "MAX"){ -rho1[ ! is.na(rho1)] <- max(rho1, na.rm = TRUE) -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY MAX CORRELATION OF rho1: ", max(rho1, na.rm = TRUE)) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "DEC1"){ -rho1[ ! is.na(rho1)] <- quantile(rho1, probs = 0.1, type = 7, na.rm = TRUE) -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY FIRST DECILE CORRELATION OF rho1: ", quantile(rho1, probs = 0.1, type = 7, na.rm = TRUE)) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "QUART1"){ -rho1[ ! is.na(rho1)] <- quantile(rho1, probs = 0.25, type = 7, na.rm = TRUE) -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY FIRST QUARTILE CORRELATION OF rho1: ", quantile(rho1, probs = 0.25, type = 7, na.rm = TRUE)) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "MED"){ -rho1[ ! is.na(rho1)] <- median(rho1, na.rm = TRUE) -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY THE MEDIAN CORRELATION OF rho1: ", median(rho1, na.rm = TRUE)) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "MIN"){ -rho1[ ! is.na(rho1)] <- min(rho1, na.rm = TRUE) -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs REPLACED BY MIN CORRELATION OF rho1: ", min(rho1, na.rm = TRUE)) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(single.corr == "NO"){ -tempo.cat <- paste0("BEWARE: ALL THE OBSERVED CORRELATIONS BETWEEN mat1.obs AND mat2.obs WILL BE USED FOR THE PERMUTATIONS, EXCEPT THOSE BELOW abs.corr.limit: ", abs.corr.limit) -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else{ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -} -# end modifiying rho1 -# sorting mat1 according to the rho1 values -for(i0 in 1:ncol(mat1.perm)){ -if(is.na(rho1[i0])){ -if( ! all(mat1.perm[, i0] == 0 )){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nrho1 IS NA AND COLUMN ", i0, " OF mat1.perm IS NOT ZERO FOR LOOP:", i0, "\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -}else if(any(is.na(mat1.perm[, i0]))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nSHOULD NOT HAVE NA IN COLUMN ", i0, " OF mat1.perm\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -}else if(rho1[i0] < 0){ -mat1.perm[, i0] <- sort(mat1.perm[, i0], decreasing = TRUE) # Beware: decreasing order here because we want a negative correlation with mat2[, i0] starting at -1, and increasing during permut -}else if(rho1[i0] >= 0){ -mat1.perm[, i0] <- sort(mat1.perm[, i0], decreasing = FALSE) # Beware: increasing order here because we want a positive correlation with mat2[, i0] starting at 1, and decreasing during permut -}else{ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -} -# end sorting mat1 according to the rho1 values -# sorting mat2.perm systematically in the ascending order -for(i0 in 1:ncol(mat2.perm)){ -if(is.na(rho2[i0])){ -if( ! all(mat2.perm[, i0] == 0 )){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nrho2 IS NA AND COLUMN ", i0, " OF mat2.perm IS NOT ZERO FOR LOOP:", i0, "\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -} -}else if(any(is.na(mat2.perm[, i0]))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nSHOULD NOT HAVE NA IN COLUMN ", i0, " OF mat2.perm\n\n============\n\n") -cat(tempo.cat) -stop() -# fun_report(data = tempo.cat, output = log.file, path = path.out) -}else{ -mat2.perm[, i0] <- sort(mat2.perm[, i0], decreasing = FALSE) -} -} -# end sorting mat2.perm systematically in the ascending order -# parallelization -perm.ini.date <- Sys.time() # time of process begin, converted into seconds -perm.ini.time <- as.numeric(perm.ini.date) # time of process begin, converted into seconds -tempo.cat <- paste0("PERMUTATION RUN INITIATED ON THEO MATRICES (DIMENSION ", paste(dim(mat1.perm), collapse = " x "), ") AT: ", perm.ini.date) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.thread.nb = parallel::detectCores(all.tests = FALSE, logical = TRUE) # detect the number of threads -if(is.null(thread.nb)){ -thread.nb <- tempo.thread.nb - 1 -}else if(tempo.thread.nb < thread.nb){ -thread.nb <- tempo.thread.nb -} -tempo.cat <- paste0("NUMBER OF THREADS USED: ", thread.nb) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -# trick to correctly dispatch the amount of work per cpu -pos.rho1.decrease <- order(rho1, decreasing = TRUE, na.last = NA) # remove the NA positions -tempo.list <- vector("list", thread.nb) # will contain a good repartition of the higher corr per cpu -count <- 0 -for(i0 in 1:length(pos.rho1.decrease)){ -count <- count + 1 -tempo.list[[count]] <- c(tempo.list[[count]], pos.rho1.decrease[i0]) -if(count == thread.nb){ -count <- 0 -} -} -tempo.list <- unlist(tempo.list) -# end trick to correctly dispatch the amount of work per cpu -Clust <- parallel::makeCluster(thread.nb, outfile = paste0(path.out, "/cor_parall_log.txt")) # outfile to print or cat during parallelization (only possible in a file, outfile = "" do not work on windowsâ—‹) -tempo.cat <- paste0("COLUMN NUMBER SPLIT FOR PARALLELISATION") -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = parallel::clusterSplit(Clust, tempo.list), output = log.file, path = path.out) -paral.output.list <- parallel::clusterApply( # paral.output.list is a list made of thread.nb compartments, each made of n / thread.nb (mat theo column number) compartment. Each compartment receive the corresponding results of fun_permut(), i.e., data (permuted mat1.perm), warning message, cor (final correlation) and count (number of permutations) -cl = Clust, -x = parallel::clusterSplit(Clust, tempo.list), # split 1:ncol(mat1.perm) vector according to the number of cluster and put into x for each cpu. Allow to take only the column of mat1.perm with no NA corr -rho1 = rho1, -mat1.perm = mat1.perm, -mat2.perm = mat2.perm, -count.print = count.print, -# very important because another R -path.function1 = path.function1, -req.package.list = req.package.list, -path.lib = path.lib, -req.python.package.list = req.python.package.list, -path.python.lib = path.python.lib, -# end very important because another R -fun = function(x, mat1.perm, mat2.perm, rho1, count.print, path.function1, req.package.list, path.lib, req.python.package.list, path.python.lib){ -# check again: very important because another R -source(path.function1) -fun_pack(req.package = req.package.list, path.lib = path.lib, load = TRUE) # load = TRUE to be sure that functions are present in the environment. And this prevent to use R.path.lib argument of fun_python_pack() -# end check again: very important because another R -output <- vector("list", length(x)) -names(output) <- as.character(x) # paste0("V", x) -# see also Protocol 100-rev0 Parallelization in R.docx -if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment -tempo.random.seed <- .Random.seed -on.exit(assign(".Random.seed", tempo.random.seed, env = .GlobalEnv)) -}else{ -on.exit(set.seed(NULL)) # inactivate seeding -> return to complete randomness -} -for(i0 in 1:length(x)){ -if(i0 == 1){ -set.seed(x[i0]) # take the first number of x for seed, which is different for each parallelization -} -tempo.cor <- suppressWarnings(cor(mat1.perm[, x[i0]], mat2.perm[, x[i0]], use = "pairwise.complete.obs", method = "spearman")) -if(length(table(mat1.perm[, x[i0]])) == 1){ -output[[i0]] <- list(data = mat1.perm[, x[i0]], warnings = paste0("NO PERMUTATION PERFORMED BECAUSE MAT1 MADE OF IDENTICAL ELEMENTS: ", names(table(mat1.perm[, x[i0]]))), cor = NA, count = 0) -}else if(length(table(mat2.perm[, x[i0]])) == 1){ -output[[i0]] <- list(data = mat1.perm[, x[i0]], warnings = paste0("NO PERMUTATION PERFORMED BECAUSE MAT2 MADE OF IDENTICAL ELEMENTS: ", names(table(mat2.perm[, x[i0]]))), cor = NA, count = 0) -}else if(tempo.cor <= rho1[x[i0]]){ -output[[i0]] <- list(data = mat1.perm[, x[i0]], warnings = paste0("NO PERMUTATION PERFORMED BECAUSE THE ABSOLUTE VALUE OF THE CORRELATION ", fun_round(tempo.cor), " BETWEEN MAT1 AND MAT2 HAS BEEN DETECTED AS BELOW THE CORRELATION LIMIT PARAMETER ", fun_round(rho1[x[i0]])), cor = tempo.cor, count = 0) -}else{ -output[[i0]] <- fun_permut(data1 = mat1.perm[, x[i0]], data2 = mat2.perm[, x[i0]], seed = NULL, text.print = paste0("DIAG NB ", x[i0]), count.print = count.print, cor.method = "spearman", cor.limit = rho1[x[i0]]) # with seed = NULL, take the global random seed that already exist because set above -} -} -return(output) -} -) -parallel::stopCluster(Clust) -tempo.date <- Sys.time() -tempo.time <- as.numeric(tempo.date) -tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - perm.ini.time)) -tempo.cat <- paste0("PERMUTATION RUN ON THEO MATRICES ACHIEVED AT: ", tempo.date, " | TIME LAPSE: ", tempo.lapse) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -# end parallelization -# recover the results -permut.list <- vector("list", ncol(mat1.perm)) # list containing all the columns of mat.perm permuted -warning.list <- vector("list", ncol(mat1.perm)) # list containing the corresponding warnings messages -cor.list <- vector("list", ncol(mat1.perm)) # list containing the corresponding final correlations -count.list <- vector("list", ncol(mat1.perm)) # list containing the corresponding permutation count -# BEWARE: erratic situations when NULL is assigned to a list. See https://stackoverflow.com/questions/7944809/assigning-null-to-a-list-element-in-r -for(i0 in 1:length(paral.output.list)){ # compartment relatives to each parallelization -for(i1 in 1:length(paral.output.list[[i0]])){ # compartment relatives to each diagonal inside parallelization -permut.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- paral.output.list[[i0]][[i1]]$data -names(permut.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] -warning.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- if(is.null(paral.output.list[[i0]][[i1]]$warnings)){"NO WARNING"}else{paral.output.list[[i0]][[i1]]$warnings} # no NULL assignation in list -names(warning.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] -cor.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- paral.output.list[[i0]][[i1]]$cor -names(cor.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] -count.list[[as.integer(names(paral.output.list[[i0]])[i1])]] <- paral.output.list[[i0]][[i1]]$count -names(count.list)[as.integer(names(paral.output.list[[i0]])[i1])] <- names(paral.output.list[[i0]])[i1] -} -} -if( ! identical(as.integer(names(permut.list)), sort(unlist(parallel::clusterSplit(Clust, tempo.list))))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nnames(permut.list) AND sort(unlist(parallel::clusterSplit(Clust, tempo.list))) SHOULD BE IDENTICAL\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -} -if( ! identical(names(permut.list), names(warning.list))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nnames(permut.list) AND names(warning.list) SHOULD BE IDENTICAL\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -} -if( ! identical(names(permut.list), names(cor.list))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nnames(permut.list) AND names(cor.list) SHOULD BE IDENTICAL\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -} -if( ! identical(names(permut.list), names(count.list))){ -tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nnames(permut.list) AND names(count.list) SHOULD BE IDENTICAL\n\n============\n\n") -fun_report(data = tempo.cat, output = log.file, path = path.out) -stop("\n", tempo.cat, "\n") -} -# end recover the results -# result print -tempo.cat <- paste0("THE CORRELATION THRESHOLD SET FOR THE PERMUTATION STEP IS: ", paste(fun_round(as.numeric(names(table(rho1)))), collapse = " ")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- paste0("THE FINAL CORRELATION BETWEEN THE COLUMNS OF MAT THEO 1 AND 2 (OBS CORRESPONDING DIAGONAL CORRELATION) ARE:\nCOLUMN\tCORR\n", paste(names(cor.list), fun_round(unlist(cor.list)), sep = "\t", collapse = "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- paste0("THE ASSOCIATED WARNING MESSAGES FROM THE fun_permut() FUNCTION ARE:\n", paste("COLUMN NB ", names(unlist(warning.list)), ": ", unlist(warning.list), collapse = "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_open(pdf.disp = TRUE, path.fun = path.out, pdf.name.file = paste0("correlation_plots_", analysis.nb), width.fun = width.wind, height.fun = height.wind) -tempo.max <- max(as.integer(names(permut.list)), na.rm = TRUE) -if(tempo.max > 40){ -cor.print.loop <- c(1:20, (tempo.max - 19):tempo.max) -tempo.max <- 40 -tempo.cat <- paste0("ONLY THE 20 FIRST AND 20 LAST DIAGONALS ARE PLOTTED IN THE correlation_plots.pdf FILE") -fun_report(data = tempo.cat, output = log.file, path = path.out) -}else{ -cor.print.loop <- 1:tempo.max -} -count.loop <- 0 -for(i0 in cor.print.loop){ -count.loop <- count.loop + 1 -tempo.data.plot <- data.frame(MAT1 = mat1.perm[, i0], MAT2 = mat2.perm[, i0]) -x.tempo.range <- range(tempo.data.plot[, "MAT1"], na.rm = TRUE, finite = TRUE) -y.tempo.range <- range(tempo.data.plot[, "MAT2"], na.rm = TRUE, finite = TRUE) -tempo.cor <- suppressWarnings(cor(x = tempo.data.plot[, "MAT1"], y = tempo.data.plot[, "MAT2"], use = "pairwise.complete.obs", method = "spearman")) -tempo.title <- paste0("BEFORE PERMUTATION | (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\nOBS MATRIX CORRESPONDING DIAGONAL NUMBER: ", i0, "\nCORRELATION VALUE: ", if(is.na(tempo.cor)){NA}else{fun_round(tempo.cor)}, "\nX SCALE RANGE: ", paste(fun_round(x.tempo.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(y.tempo.range, 2), collapse = " , ")) -# fun_prior_plot(param.reinitial = FALSE, xlog.scale = FALSE, ylog.scale = FALSE, remove.label = TRUE, remove.x.axis = TRUE, remove.y.axis = TRUE, std.x.range = FALSE, std.y.range = FALSE, down.space = height.wind / 7 * 1, left.space = width.wind / 7 * 1, up.space = height.wind / 7 * 1, right.space = width.wind / 7 * 1, orient = 1, dist.legend = 5, tick.length = 0.5, box.type = "n", amplif.label = 1, amplif.axis = 1, display.extend = FALSE, return.par = FALSE) -# plot(tempo.data.plot[, "MAT1"], tempo.data.plot[, "MAT2"], col = hsv(0, 0, 0.5, 0.5), pch = 16, xlim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, ylim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}) -# tempo.coord <- fun_post_plot(x.side = 1, x.lab = "MAT1", y.side = 2, y.lab = "MAT2", x.axis.magnific = 1, x.label.magnific = 1, y.axis.magnific = 1, y.label.magnific = 1) -# par(xpd = TRUE) -# text(x = tempo.coord$x.left.plot.region, y = tempo.coord$y.top.fig.region, labels = tempo.title, cex = 0.65, adj = c(0, 1)) -# par(xpd = FALSE) -fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, xlim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, ylim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = FALSE) -tempo.data.plot <- data.frame(MAT1 = permut.list[[i0]], MAT2 = mat2.perm[, i0]) -x.tempo.range <- range(tempo.data.plot[, "MAT1"], na.rm = TRUE, finite = TRUE) -y.tempo.range <- range(tempo.data.plot[, "MAT2"], na.rm = TRUE, finite = TRUE) -tempo.cor <- suppressWarnings(cor(x = tempo.data.plot[, "MAT1"], y = tempo.data.plot[, "MAT2"], use = "pairwise.complete.obs", method = "spearman")) -tempo.title <- paste0("AFTER PERMUTATION | (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\nOBS MATRIX CORRESPONDING DIAGONAL NUMBER: ", i0, "\nCORRELATION VALUE: ", if(is.na(tempo.cor)){NA}else{fun_round(tempo.cor)}, "\nCORRELATION INDICATED BY THE fun_permut() FUNCTION: ", if(is.na(cor.list[[i0]])){NA}else{fun_round(cor.list[[i0]])}, "\nX SCALE RANGE: ", paste(fun_round(x.tempo.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(y.tempo.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MAT1", y = "MAT2", color = fun_gg_palette(1), geom = "geom_point", alpha = 0.5, dot.size = dot.size, xlim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, x.tick.nb = 8, ylim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = FALSE) -# fun_prior_plot(param.reinitial = FALSE, xlog.scale = FALSE, ylog.scale = FALSE, remove.label = TRUE, remove.x.axis = TRUE, remove.y.axis = TRUE, std.x.range = FALSE, std.y.range = FALSE, down.space = height.wind / 7 * 1, left.space = width.wind / 7 * 1, up.space = height.wind / 7 * 1, right.space = width.wind / 7 * 1, orient = 1, dist.legend = 5, tick.length = 0.5, box.type = "n", amplif.label = 1, amplif.axis = 1, display.extend = FALSE, return.par = FALSE) -# plot(tempo.data.plot[, "MAT1"], tempo.data.plot[, "MAT2"], col = hsv(1, 0.5, 1, 0.5), pch = 16, xlim = if(diff(x.tempo.range) == 0){c(unique(x.tempo.range) - 1, unique(x.tempo.range) + 1)}else{x.tempo.range}, ylim = if(diff(y.tempo.range) == 0){c(unique(y.tempo.range) - 1, unique(y.tempo.range) + 1)}else{y.tempo.range}) -# tempo.coord <- fun_post_plot(x.side = 1, x.lab = "MAT1", y.side = 2, y.lab = "MAT2", x.axis.magnific = 1, x.label.magnific = 1, y.axis.magnific = 1, y.label.magnific = 1) -# par(xpd = TRUE) -# text(x = tempo.coord$x.left.plot.region, y = tempo.coord$y.top.fig.region, labels = tempo.title, cex = 0.65, adj = c(0, 1)) -# par(xpd = FALSE) -cat(paste0("\nPRINT CORR GRAPH NB ", count.loop, " / ", tempo.max, " (DIAG NB ", i0, ")")) -} -cat("\n") -dev.off() -# end result print -# mat1.perm filling -for(i0 in 1:ncol(mat1.perm)){ -mat1.perm[, i0] <- permut.list[[i0]] # final permutation of the values using the permuted positions -} -# end mat1.perm filling -} -if(keep == FALSE){ -tempo.list <- c("mat1.ini", "mat1.perm", "mat2.ini", "mat2.perm") -tempo.cat <- paste0("INITIAL AND PERMUTED THEO MATRICES SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -rm(list = c("mat1.ini", "mat2.ini")) # .perm still needed -} -# end Permutation of the mat1.ini values to have the mat1.obs correlation on the diagonales between mat1 and mat2 -# mean / cor of the theoretical matrices -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- suppressWarnings(data.frame(MEAN = (apply(X = mat1.perm, MARGIN = 2, FUN = "mean") + apply(X = mat2.perm, MARGIN = 2, FUN = "mean")) / 2, COR = mapply(FUN = "cor", x = as.data.frame(mat1.perm), y = as.data.frame(mat2.perm), use = "pairwise.complete.obs", method = "spearman", SIMPLIFY = TRUE))) -if(transfo != "no"){ -tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap -} -tempo.title <- paste0("AFTER PERMUTATION\n(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range2, ylog = "no", ylab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end mean / cor of the theoretical matrices -# correlation versus index before and after permutation -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if(length(rho2) != length(unlist(cor.list))){ # important because rho1 will be modified, and rho2 kept as the initial correlation between obs mat1 and mat2 -tempo.cat <- paste0("\n\n================\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE TWO CORRELATIONS rho2 AND cor.list DO NOT HAVE THE SAME LENGTH\n\n================\n\n") # message for developers -stop(tempo.cat) -} -tempo.data.plot <- data.frame(CORRELATION = c(rho2, unlist(cor.list)), COLUMN_NB = c(1:length(rho2), 1:length(rho2)), KIND = rep(c("OBS", "THEO"), each = length(rho2))) -tempo.title <- paste0("SPEARMAN CORRELATION BETWEEN MAT1 OBS / MAT2 OBS\nCOMPARISON WITH MAT1 THEO / MAT2 THEO AFTER PERMUTATION\nX SCALE RANGE: ", paste(range(1:length(rho2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: -1, 1") -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "DIAGONAL INDEX", x.tick.nb = 8, ylog = "no", ylab = "CORRELATION", ylim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster) -# end correlation versus index before and after permutation - - -################ End Taking correlation into account - - -################ sub sampling - - -cat("\nTHEO MATRICES SUBSAMPLING\n") -fun_report(data = "\n\n################################ THEO MATRICES SUBSAMPLING", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "THEO MATRICES\nSUBSAMPLING", text.size = 3) - - -# theoretical matrix subsampling (this allow to have smaller values on the mean x of the MD plot) -# division of the coverage in each cell of column i0 of theo mat by sub.factor1[i0] (reduction of the coverage), and then random sampling of one value considering that the coverage is a mean coverage of the poisson law (is poisson law, lamnda = mean = variance). Thus, sapply(lambda, FUN = rpois, n = 1) -mat1.down <- mat1.perm -for(i0 in 1:ncol(mat1.down)){ -if( ! all(is.na(mat1.down[, i0]))){ -mat1.down[, i0] <- sapply(mat1.down[, i0] / sub.factor1[i0], FUN = rpois, n = 1) -} -} -mat2.down <- mat2.perm -for(i0 in 1:ncol(mat2.down)){ -if( ! all(is.na(mat2.down[, i0]))){ -mat2.down[, i0] <- sapply(mat2.down[, i0] / sub.factor2[i0], FUN = rpois, n = 1) -} -} -# randomize the mat1.down row values in each column, to break the order in the first matrix, and use the same row order in mat2.down before using serpentine to keep the correlation between columns intact -# In fact, change nothing to shuffle or not -if(nrow(mat1.down) > 1){ -shuffled.row.pos <- sample(x = 1:nrow(mat1.down), size = nrow(mat1.down), replace = FALSE) -}else{ -stop("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nA SINGLE DIAGONAL PRESENT IN THEO MATRIX 1 (mat1.down)\n\n============\n\n") -} -# shuffling the rows of mat1.down and mat2.down (same repositionning for both matrix, to keep the relationship between rows (correlation)) -mat1.mix <- mat1.down[shuffled.row.pos, ] -mat2.mix <- mat2.down[shuffled.row.pos, ] -if(keep == FALSE){ -tempo.list <- c("mat1.down", "mat2.down") -tempo.cat <- paste0("DOWNSAMPLED THEO MATRICES SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} -# export of theo matrices before binning -loop.mat.names <- c("mat1.mix", "mat2.mix") -for(i0 in 1:length(loop.mat.names)){ -write.table(get(loop.mat.names[i0]), file = paste0(path.out, "/mat", i0, "theo.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -# end export of theo matrices before binning -# end theoretical matrix subsampling (this allow to have smaller values on the mean x of the MD plot) - - -# mean versus index plot before sub sampling -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat1.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat1.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat1.perm, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat1.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat1.perm, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND BEFORE SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat2.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat2.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat2.perm, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat2.perm, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat2.perm, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND BEFORE SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot before sub sampling -if(keep == FALSE){ -tempo.list <- c("mat1.perm", "mat2.perm") -# save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) # not saved here because already saved before -rm(list = tempo.list) # not saved because initial matrices -} -# mean versus index plot after sub sampling -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat1.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat1.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat1.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat1.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat1.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND AFTER SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- data.frame(MEAN = apply(X = mat2.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE), MEAN.MINUS.SD = apply(X = mat2.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) - apply(mat2.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE), MEAN.PLUS.SD = apply(X = mat2.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) + apply(mat2.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE)) -if(transfo != "no"){ -tempo.data.plot <- suppressWarnings(get(transfo)(tempo.data.plot)) # log(x + 1) only for heatmap -if(any(is.na(tempo.data.plot$MEAN.MINUS.SD))){ -tempo.data.plot$MEAN.MINUS.SD[is.na(tempo.data.plot$MEAN.MINUS.SD)] <- tempo.data.plot$MEAN[is.na(tempo.data.plot$MEAN.MINUS.SD)] -} -} -tempo.data.plot <- data.frame(COLUMN_NB = 1:nrow(tempo.data.plot), tempo.data.plot) -tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND AFTER SUB SAMPLING\nMEAN PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x)\nABSENT SD IF LOG2(M - SD) < 0 "}else if(transfo == "log10"){"MEAN AND SD LOG10(x)\nABSENT SD IF LOG10(M - SD) < 0 "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(range(1:nrow(tempo.data.plot), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("MEAN"), color = list(grey(0.40)), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "COLUMN NB", x.tick.nb = 8, ylog = transfo, ylab = "MEAN", ylim = m_sd.y.range2, y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, add = paste0('+ ggplot2::geom_segment(data = data.frame(x = tempo.data.plot$COLUMN_NB, xend = tempo.data.plot$COLUMN_NB, y = tempo.data.plot$MEAN.MINUS.SD, yend = tempo.data.plot$MEAN.PLUS.SD), ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = grey(0.40), alpha = 0.3)')) -# end mean versus index plot after sub sampling - - -# MD plot for the theoretical matrix 1 after sub sampling and permutation -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -x.MEAN <- apply(X = mat1.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) -y.SD <- apply(mat1.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE) -tempo.data.plot <- data.frame(MEAN = x.MEAN, SD = y.SD) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -tempo.data.negbinom <- data.frame(x.green = get(transfo)(mean.nb1), y.green = get(transfo)(cv.nb1 * mean.nb1)) # neg binomiale distrib -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -tempo.data.negbinom <- data.frame(x.green = mean.nb1, y.green = cv.nb1 * mean.nb1) # neg binomiale distrib -} -tempo.title <- paste0("MAT1 THEO\nAFTER PERMUTATION AND SUB SAMPLING\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range2, ylog = transfo, ylab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end MD plot for the theoretical matrix 1 after sub sampling and permutation - -# MD plot for the theoretical matrix 2 after sub sampling and permutation -x.MEAN <- apply(X = mat2.mix, MARGIN = 2, FUN = "mean", na.rm = TRUE) -y.SD <- apply(mat2.mix, MARGIN = 2, FUN = "sd", na.rm = TRUE) -tempo.data.plot <- data.frame(MEAN = x.MEAN, SD = y.SD) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) # log(x + 1) only for heatmap -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN / 2, y.cv.pois = get(transfo)(1) - tempo.data.plot$MEAN / 2) # because poisson distrib is mean = variance, thus sd = mean ^0.5, ie each mean as x is square rooted for y and then x.MEAN / 2 because log2(mean^0.5) = 0.5 * log2(mean). For cv, cv = sd/m -> log(cv) = log(sd/m) = log(m/m^0.5) = log(1/m^0.5) = log(1) - log(m^0.5) = log(1) - log(m)/2 -tempo.data.negbinom <- data.frame(x.green = get(transfo)(mean.nb2), y.green = get(transfo)(cv.nb2 * mean.nb2)) # neg binomiale distrib -}else{ -tempo.data.pois <- data.frame(x.pois = tempo.data.plot$MEAN, y.sd.pois = tempo.data.plot$MEAN^0.5, y.cv.pois = 1 / tempo.data.plot$MEAN^0.5) -tempo.data.negbinom <- data.frame(x.green = mean.nb2, y.green = cv.nb2 * mean.nb2) # neg binomiale distrib -} -tempo.title <- paste0("MAT2 THEO\nAFTER PERMUTATION AND SUB SAMPLING\nMEAN DEVIATION (MD) PLOT\n", if(transfo == "log2"){"MEAN AND SD LOG2(x) "}else if(transfo == "log10"){"MEAN AND SD LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_sd.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_sd.y.range2, 2), collapse = " , "), "\nRED LINE: POISSON DISTRIB\nGREEN LINE: NEG BINOM DISTRIB") -fun_gg_scatter(data1 = list(tempo.data.plot, tempo.data.pois, tempo.data.negbinom), x = list("MEAN", "x.pois", "x.green"), y = list("SD", "y.sd.pois", "y.green"), color = list(grey(0.40), "red", "green"), geom = list("geom_point", "geom_line", "geom_line"), alpha = list(0.5, 1, 1), dot.size = dot.size, line.size = line.size, xlim = m_sd.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_sd.y.range2, ylog = transfo, ylab = "SD",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end MD plot for the theoretical matrix 2 after sub sampling and permutation - -# mean / cor of the theoretical matrices -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -tempo.data.plot <- suppressWarnings(data.frame(MEAN = (apply(X = mat1.mix, MARGIN = 2, FUN = "mean") + apply(X = mat2.mix, MARGIN = 2, FUN = "mean")) / 2, COR = mapply(FUN = "cor", x = as.data.frame(mat1.mix), y = as.data.frame(mat2.mix), use = "pairwise.complete.obs", method = "spearman", SIMPLIFY = TRUE))) -if(transfo != "no"){ -tempo.data.plot[, "MEAN"] <- get(transfo)(tempo.data.plot[, "MEAN"]) # log(x + 1) only for heatmap -} -tempo.title <- paste0("AFTER PERMUTATION AND SUB SAMPLING\n(MAT1 THEO / MAT2 THEO) MEAN VERSUS (MAT1 THEO / MAT2 THEO) SPEARMAN CORRELATION\n", if(transfo == "log2"){"MEAN LOG2(x) "}else if(transfo == "log10"){"MEAN LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(m_cor.x.range2, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(m_cor.y.range2, 2), collapse = " , ")) -fun_gg_scatter(data1 = tempo.data.plot, x = "MEAN", y = "COR", color = grey(0.40), geom = "geom_point", alpha = 0.5, dot.size = dot.size, line.size = line.size, xlim = m_cor.x.range2, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylim = m_cor.y.range2, ylog = "no", ylab = "CORRELATION",y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster, x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0) # x.left.extra.margin = 0, x.right.extra.margin = 0, y.top.extra.margin = 0, y.bottom.extra.margin = 0 because already in m_sd.coord.obs -# end correlation versus index before and after subsampling -tempo.cor <- suppressWarnings(mapply(FUN = "cor", c(data.frame(mat1.mix)), c(data.frame(mat2.mix)), use = "pairwise.complete.obs", method = "spearman")) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if(length(rho2) != length(tempo.cor)){ # -tempo.cat <- paste0("\n\n================\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE TWO CORRELATIONS rho2 AND tempo.cor DO NOT HAVE THE SAME LENGTH\n\n================\n\n") # message for developers -stop(tempo.cat) -} -tempo.data.plot <- data.frame(CORRELATION = c(rho2, unlist(cor.list), tempo.cor), COLUMN_NB = c(1:length(rho2), 1:length(rho2), 1:length(rho2)), KIND = rep(c("OBS", "THEO_AFTER_PERMUTATION", "THEO_AFTER_DOWNSAMPLING"), each = length(rho2))) -tempo.title <- paste0("SPEARMAN CORRELATION BETWEEN MAT1 OBS / MAT2 OBS\nCOMPARISON WITH MAT1 THEO / MAT2 THEO 1) AFTER PERMUTATION AND 2) AFTER PERMUTATION AND DOWNSAMPLING\nX SCALE RANGE: ", paste(range(1:length(rho2), na.rm = TRUE, finite = TRUE), collapse = " , "), "\nY SCALE RANGE: -1, 1") -fun_gg_scatter(data1 = list(tempo.data.plot), x = list("COLUMN_NB"), y = list("CORRELATION"), categ = list("KIND"), geom = list("geom_point"), alpha = list(0.5), dot.size = dot.size, line.size = line.size, xlog = "no", xlab = "DIAGONAL INDEX", x.tick.nb = 8, ylog = "no", ylab = "CORRELATION", ylim = c(-1, 1), y.tick.nb = 8, title = tempo.title, text.size = text.size, title.text.size = title.text.size, classic = TRUE, raster = raster) -# end correlation versus index before and after subsampling - - -################ end sub sampling - - -################ significant differences pre serpentine (SLITHERINE) - - -}else{ -if(serp.binning == TRUE){ -serp.binning <- FALSE -tempo.warning <- paste0("THE serp.binning PARAMETER SETTING HAS BEEN SET TO TRUE, BUT THEORETICAL MATRICES HAVE BEEN IMPORTED\n-> PRE SERPENTINE ANALYSIS IS SUFFICIENT (IF YOU NEED MORE SERPENTINE BINNING, INCREASE THE VALUE OF THE serp.iter.nb PARAMETER)\n-> serp.binning PARAMETER RESET TO FALSE") -cat(paste0("\nWARNING: ", tempo.warning, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), tempo.warning) # in fact, abs(tempo.cor) is systematically used -} -} - - -cat("\nSIGNIFICANT DIFFERENCES PRE SERPENTINE (SLITHERINE)\n") -fun_report(data = "\n\n################################ SIGNIFICANT DIFFERENCES PRE SERPENTINE (SLITHERINE)", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES PRE SERPENTINE\n(SLITHERINE)", text.size = 3) -# theo dataframe alone -if(transfo != "no"){ -fun_report(data = paste0("SIGNIFICANT DIFFERENCES PERFORMED ON ", toupper(transfo), " TRANSFORMED DATA"), output = log.file, path = path.out) -theo <- data.frame(MEAN = get(transfo)(as.vector((mat1.mix + mat2.mix) / 2)), RATIO = get(transfo)(as.vector(mat2.mix / mat1.mix)), MATRICES = "Theo", coord_1D = 1:(nrow(mat1.mix) * ncol(mat1.mix)), stringsAsFactors = FALSE) # mat2.mix / mat1.mix to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -}else{ -fun_report(data = paste0("SIGNIFICANT DIFFERENCES PERFORMED ON ", toupper(transfo), "N TRANSFORMED DATA"), output = log.file, path = path.out) -theo <- data.frame(MEAN = as.vector((mat1.mix + mat2.mix) / 2), RATIO = as.vector(mat2.mix / mat1.mix), MATRICES = "Theo", coord_1D = 1:(nrow(mat1.mix) * ncol(mat1.mix)), stringsAsFactors = FALSE) # mat2.mix / mat1.mix to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -} -# observed dataframe -if(transfo != "no"){ -obs <- data.frame(MEAN = get(transfo)(as.vector((mat1.obs + mat2.obs) / 2)), RATIO = get(transfo)(as.vector(mat2.obs / mat1.obs)), MATRICES = "Obs", coord_1D = 1:(nrow(mat1.obs) * ncol(mat1.obs)), stringsAsFactors = FALSE) # mat2.obs / mat1.obs to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -}else{ -obs <- data.frame(MEAN = as.vector((mat1.obs + mat2.obs) / 2), RATIO = as.vector(mat2.obs / mat1.obs), MATRICES = "Obs", coord_1D = 1:(nrow(mat1.obs) * ncol(mat1.obs)), stringsAsFactors = FALSE) # mat2.obs / mat1.obs to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -} -final <- rbind(theo, obs) -# BEWARE: segment.pre.serp, final, obs and theo integrate log transfo already -segment.pre.serp <- fun_segmentation(data1 = theo, x1 = "MEAN", y1 = "RATIO", x.range.split = range.split, x.step.factor = step.factor, error = error, data2 = obs, x2 = "MEAN", y2 = "RATIO", data2.pb.dot = "signif", plot = FALSE, graph.in.file = FALSE) -fun_report(data = "UNKNOWN DOTS HAVE BEEN CONSIDERED AS SIGNIFICANTS (ARGUMENT data2.pb.dot OF fun_segmentation() SET TO \"signif\")", output = log.file, path = path.out) -if( ! is.null(segment.pre.serp$hframe)){ -names(segment.pre.serp$hframe)[names(segment.pre.serp$hframe) == "kind"] <- "FRAMES" # only dot nb are finally kept -}else{ -tempo.cat <- paste0("BEWARE: NO HORIZONTAL FRAME DETECTED DURING SEGMENTATION PRE SERPENTINE") -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -# cat(paste0("\n", segment.pre.serp$warnings, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.pre.serp$warnings) # in fact, abs(tempo.cor) is systematically used -fun_report(data = segment.pre.serp$warnings, output = log.file, path = path.out) - - -# second y filtering: -# weighting the cell ratio before computing the global mean ratio, after moving NA, Inf columns and also NA, Inf cells in columns with values -if(transfo != "no"){ -ratio.center.adj <- adj.mean.fun(get(transfo)(mat2.mix / mat1.mix), text = "BEFORE SERPENTINE ratio.center.adj PARAMETER THRESHOLD ADJUSTEMENT ACCORDING TO THE WEIGHTED MEAN") -tempo.sup <- get(transfo)(max(ratio.limit.sig, 1 / ratio.limit.sig, na.rm = TRUE)) -tempo.inf <- get(transfo)(min(ratio.limit.sig, 1 / ratio.limit.sig, na.rm = TRUE)) -y.range.limit.sup <- tempo.sup - ifelse(ratio.center.adj > 0, - abs(ratio.center.adj), abs(ratio.center.adj)) # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -y.range.limit.inf <- tempo.inf - ifelse(ratio.center.adj > 0, - abs(ratio.center.adj), abs(ratio.center.adj)) # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -}else{ -ratio.center.adj <- adj.mean.fun((mat2.mix / mat1.mix), text = "AFTER SERPENTINE ratio.center.adj PARAMETER THRESHOLD ADJUSTEMENT ACCORDING TO THE WEIGHTED MEAN") -y.range.limit.sup <- ratio.center.adj * ratio.limit.sig # -y.range.limit.inf <- ratio.center.adj / ratio.limit.sig # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -} -# end weighting the cell ratio before computing the global mean ratio, after moving NA, Inf columns and also NA, Inf cells in columns with values -signif.theo.dot.pre <- segment.pre.serp$data1.signif.dot # significant table of data1 pre serpentine -if( ! is.null(signif.theo.dot.pre)){ -if(all(signif.theo.dot.pre$RATIO > y.range.limit.inf & signif.theo.dot.pre$RATIO < y.range.limit.sup)){ -tempo.cat <- paste0("BEWARE: SIGNIFICANT THEO DOTS DETECTED DURING SEGMENTATION PRE SERPENTINE\nBUT NOT ANYMORE AFTER USING THE ratio.limit.sig PARAMETER (", ratio.limit.sig, ")\nTAKING INTO ACCOUNT THE GLOBAL MEAN RATIO (", fun_round(ratio.center.adj), "), THE SIGNIFICANT LIMITS WHERE: ", paste(fun_round(c(y.range.limit.inf, y.range.limit.sup)), collapse = " ")) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = "THE SIGNIFICANT DOTS ARE:", output = log.file, path = path.out, sep = 1) -fun_report(data = "THE SIGNIFICANT DOTS ARE:", output = log.file, path = path.out, sep = 1) -fun_report(data = signif.theo.dot.pre, output = log.file, path = path.out) -signif.theo.dot.pre <- NULL -}else{ -signif.theo.dot.pre <- unique(signif.theo.dot.pre[ ! (signif.theo.dot.pre$RATIO > y.range.limit.inf & signif.theo.dot.pre$RATIO < y.range.limit.sup), ]) # only dot nb are finally kept -} -}else{ -tempo.cat <- paste0("NO SIGNIFICANT THEO DOTS DETECTED DURING SEGMENTATION PRE SERPENTINE") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -signif.obs.dot.pre <- segment.pre.serp$data2.signif.dot # significant table of data2 pre serpentine -sup.signif.obs.dot.pre <- NULL -inf.signif.obs.dot.pre <- NULL -if( ! is.null(signif.obs.dot.pre)){ -if(all(signif.obs.dot.pre$RATIO > y.range.limit.inf & signif.obs.dot.pre$RATIO < y.range.limit.sup)){ -tempo.cat <- paste0("BEWARE: SIGNIFICANT OBS DOTS DETECTED DURING SEGMENTATION PRE SERPENTINE\nBUT NOT ANYMORE AFTER USING THE ratio.limit.sig PARAMETER (", ratio.limit.sig, ")\nTAKING INTO ACCOUNT THE GLOBAL MEAN RATIO (", fun_round(ratio.center.adj), "), THE SIGNIFICANT LIMITS WHERE: ", paste(fun_round(c(y.range.limit.inf, y.range.limit.sup)), collapse = " ")) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = "THE SIGNIFICANT DOTS ARE:", output = log.file, path = path.out, sep = 1) -fun_report(data = signif.obs.dot.pre, output = log.file, path = path.out) -signif.obs.dot.pre <- NULL -}else{ -signif.obs.dot.pre <- unique(signif.obs.dot.pre[ ! (signif.obs.dot.pre$RATIO > y.range.limit.inf & signif.obs.dot.pre$RATIO < y.range.limit.sup), ]) # -sup.signif.obs.dot.pre <- signif.obs.dot.pre[signif.obs.dot.pre$RATIO > 0, ] # positive log ratio, i.e., mat2 > mat1 -inf.signif.obs.dot.pre <- signif.obs.dot.pre[signif.obs.dot.pre$RATIO < 0, ] # negative log ratio, i.e., mat2 < mat1 -} -}else{ -tempo.cat <- paste0("NO SIGNIFICANT OBS DOTS DETECTED DURING SEGMENTATION PRE SERPENTINE") -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -# end second y filtering: - -fun_report(data = paste0("PRE SERPENTINE SEGMENTATION DATA (POTENTIALLY LOG TRANSFORMED) SAVED IN: ", paste0(path.out, "/segmentation_pre_serp.RData")), output = log.file, path = path.out) -save(list = c("segment.pre.serp", "signif.theo.dot.pre", "signif.obs.dot.pre", "sup.signif.obs.dot.pre", "inf.signif.obs.dot.pre", "theo", "obs"), file = paste0(path.out, "/segmentation_pre_serp.RData")) -# segment.pre.serp result of segmentation (no second y filtering) -# signif.theo.dot.pre significant theo dot after second y filtering -# signif.obs.dot.pre significant obs dot after second y filtering -# theo mean and ratio cells of the 2 theo matrices, without transformation, used for the segmentation -# obs mean and ratio cells of the 2 theo matrices, without transformation, used for the segmentation - -# plot verif obs dots outside -# MD overlay plot before serpentine -segm.x.range <- range(final$MEAN, na.rm = TRUE, finite = TRUE) -segm.y.range <- range(c(final$RATIO, y.range.limit.inf, y.range.limit.sup), na.rm = TRUE, finite = TRUE) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.theo.dot.pre)){ # signif dots in theo matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE + THEO SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = theo, L2 = signif.theo.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE (NO THEO SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = theo, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} - -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.obs.dot.pre)){ # signif dots in obs matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE + OBS SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = obs, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE (NO OBS SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = obs, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.obs.dot.pre)){ # signif dots in obs matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT + OBS SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = final, L2 = signif.obs.dot.pre, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT (NO OBS SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = final, L3 = segment.pre.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} -# end plot verif obs dots outside -# end MD overlay plot before serpentine - - -# Mask of the obs data outside the cloud of the theo data on the MDMR plot (MEan Difference Mean Ratio) -if(is.null(signif.obs.dot.pre)){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "PRE SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT DOT DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT DOT DETECTED", output = log.file, path = path.out) -}else{ -# file saving -sup.obs.mask.pre <- matrix(0, nrow = nrow(mat1.obs), ncol = nrow(mat1.obs)) # matrix same dim as obs full of zero -inf.obs.mask.pre <- matrix(0, nrow = nrow(mat1.obs), ncol = nrow(mat1.obs)) # matrix same dim as obs full of zero -if(nrow(sup.signif.obs.dot.pre) > 0){ -sup.obs.mask.pre[sup.signif.obs.dot.pre$coord_1D] <- 1 # create the mask. If signif.obs.dot.pre is NULL, no 1 added -fun_report(data = paste0("PRE SERPENTINE MAT2 > MAT1 MASK DATA SAVED IN: ", paste0(path.out, "/sup_mask_pre_serp.txt")), output = log.file, path = path.out) -write.table(sup.obs.mask.pre, file = paste0(path.out, "/sup_mask_pre_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -if(nrow(inf.signif.obs.dot.pre) > 0){ -inf.obs.mask.pre[inf.signif.obs.dot.pre$coord_1D] <- 1 # create the mask. If signif.obs.dot.pre is NULL, no 1 added -fun_report(data = paste0("PRE SERPENTINE MAT2 < MAT1 MASK DATA SAVED IN: ", paste0(path.out, "/inf_mask_pre_serp.txt")), output = log.file, path = path.out) -write.table(inf.obs.mask.pre, file = paste0(path.out, "/inf_mask_pre_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -obs.mask.pre <- sup.obs.mask.pre + inf.obs.mask.pre -fun_report(data = paste0("PRE SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/mask_pre_serp.txt")), output = log.file, path = path.out) -write.table(inf.obs.mask.pre, file = paste0(path.out, "/mask_pre_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(nrow(sup.signif.obs.dot.pre) > 0){ -mask.plot.fun(mask_fun = sup.obs.mask.pre, mat1_fun = mat1.obs, mat2_fun = mat2.obs, serp_kind = "BEFORE", mask_kind = "POSITIVE (MAT2 > MAT1)") -}else{ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "PRE SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 > MAT1 SIGNIFICANT DOTS DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 > MAT1 SIGNIFICANT DOTS DETECTED", output = log.file, path = path.out) -} -if(nrow(inf.signif.obs.dot.pre) > 0){ -mask.plot.fun(mask_fun = inf.obs.mask.pre, mat1_fun = mat1.obs, mat2_fun = mat2.obs, serp_kind = "BEFORE", mask_kind = "NEGATIVE (MAT2 < MAT1)") -}else{ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "PRE SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 < MAT1 SIGNIFICANT DOTS DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 < MAT1 SIGNIFICANT DOTS DETECTED", output = log.file, path = path.out) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = obs.mask.pre, mat1_fun = mat1.obs, mat2_fun = mat2.obs, serp_kind = "BEFORE", mask_kind = "FULL") -# end heatmaps -} -if(keep == FALSE){ -tempo.list <- c("obs.mask.pre", "sup.obs.mask.pre", "inf.obs.mask.pre") -tempo.cat <- paste0("PRE SERPENTINE MASK MATRICES SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} - - -################ end significant differences pre serpentine (SLITHERINE) - - -################ significant differences pre serpentine (HIC COMPARE) - - -# see https://bioconductor.org/packages/release/bioc/manuals/HiCcompare/man/HiCcompare.pdf -if(hiccomp == TRUE){ -cat("\nSIGNIFICANT DIFFERENCES PRE SERPENTINE (HIC COMPARE)\n") -fun_report(data = "\n\n################################ SIGNIFICANT DIFFERENCES PRE SERPENTINE (HIC COMPARE)", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES PRE SERPENTINE\n(HIC COMPARE)", text.size = 3) -# p values computations -sparse.name <- cumsum(c(0, rep(binning, ncol(mat1.obs) - 1))) -mat1.obs.sparse <- mat1.obs -colnames(mat1.obs.sparse) <- sparse.name -mat1.obs.sparse <- HiCcompare::full2sparse(mat1.obs.sparse) -mat2.obs.sparse <- mat2.obs -colnames(mat2.obs.sparse) <- sparse.name -mat2.obs.sparse <- HiCcompare::full2sparse(mat2.obs.sparse) -hic.table <- HiCcompare::create.hic.table(mat1.obs.sparse, mat2.obs.sparse, chr = "chr5") -norm.hic.table <- suppressMessages(HiCcompare::hic_loess(hic.table)) # see the help of the hic_loess() function for the description of the returned table -diff.res <- suppressMessages(HiCcompare::hic_compare(norm.hic.table)) -diff.res <- as.data.frame(diff.res) -diff.res <- data.frame(diff.res, PVAL_MASK = 0L, PADJ_MASK = 0L) # add two columns to make the mask matrices -tempo.log.pvalue <- diff.res$p.value <= 0.05 -tempo.log.padj <- diff.res$p.adj <= 0.05 -if(sum(tempo.log.pvalue, na.rm = TRUE) > 0){ -diff.res$PVAL_MASK[tempo.log.pvalue] <- 1 -} -if(sum(tempo.log.padj, na.rm = TRUE) > 0){ -diff.res$PADJ_MASK[tempo.log.padj] <- 1 -} -# end p values computations -# heatmaps -# mask 1 -hiccomp1.mask.pre <- NULL -if(sum(tempo.log.pvalue, na.rm = TRUE) <= 0){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "PRE SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(NON ADJUSTED P VALUES)", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(NON ADJUSTED P VALUES)", output = log.file, path = path.out) -}else{ -# hiccomp.mask.pre <- HiCcompare::sparse2full(diff.res, hic.table = TRUE, column.name = "PVAL_MASK") # does not work because full2sparse() function removes the row of columns full of zero (dimension decreased) -hiccomp1.mask.pre <- matrix(0L, nrow = nrow(mat1.obs), ncol = ncol(mat1.obs)) -tempo.mat.coord <- as.matrix(data.frame(match(diff.res$start1[tempo.log.pvalue], sparse.name), -match(diff.res$start2[tempo.log.pvalue], sparse.name))) -tempo.mat.coord <- rbind(tempo.mat.coord, tempo.mat.coord[, c(2, 1)]) # to have a symmetric mask -hiccomp1.mask.pre[tempo.mat.coord] <- 1 -# end mask 1 -# file saving -fun_report(data = paste0("PRE SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/hicc_mask_pre_serp.txt")), output = log.file, path = path.out) -write.table(hiccomp1.mask.pre, file = paste0(path.out, "/hicc_mask_pre_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = hiccomp1.mask.pre, mat1_fun = mat1.obs, mat2_fun = mat2.obs, serp_kind = "BEFORE", mask_kind = "FULL", text_fun = "\nHIC COMPARE (NON ADJUSTED P VALUES)") -} -# end mask 1 -# mask 2 -hiccomp2.mask.pre <- NULL -if(sum(tempo.log.padj, na.rm = TRUE) <= 0){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "PRE SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(ADJUSTED P VALUES)", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(ADJUSTED P VALUES)", output = log.file, path = path.out) -}else{ -# hiccomp.mask.pre <- HiCcompare::sparse2full(diff.res, hic.table = TRUE, column.name = "PVAL_MASK") # does not work because full2sparse() function removes the row of columns full of zero (dimension decreased) -hiccomp2.mask.pre <- matrix(0L, nrow = nrow(mat1.obs), ncol = ncol(mat1.obs)) -tempo.mat.coord <- as.matrix(data.frame(match(diff.res$start1[tempo.log.padj], sparse.name), -match(diff.res$start2[tempo.log.padj], sparse.name))) -tempo.mat.coord <- rbind(tempo.mat.coord, tempo.mat.coord[, c(2, 1)]) # to have a symmetric mask -hiccomp2.mask.pre[tempo.mat.coord] <- 1 -# end mask 1 -# file saving -fun_report(data = paste0("PRE SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/hicc_padj_mask_pre_serp.txt")), output = log.file, path = path.out) -write.table(hiccomp2.mask.pre, file = paste0(path.out, "/hicc_padj_mask_pre_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = hiccomp2.mask.pre, mat1_fun = mat1.obs, mat2_fun = mat2.obs, serp_kind = "BEFORE", mask_kind = "FULL", text_fun = "\nHIC COMPARE (ADJUSTED P VALUES)") -} -# end mask -# end heatmaps -if(keep == FALSE){ -tempo.list <- c("mat1.obs.sparse", "mat2.obs.sparse", "diff.res", "hiccomp1.mask.pre", "hiccomp2.mask.pre") -tempo.cat <- paste0("PRE SERPENTINE HIC COMPARE MASK MATRICES SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/pre_serp_hiccomp_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} -} - - -################ serpentine - - -if(serp.binning == FALSE){ -tempo.cat <- "NO SERPENTINE BINNING (USER REQUEST OR THEORETICAL MATRICES IMPORTED)" -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, path = path.out, output = log.file, vector.cat = TRUE) -}else{ - - -# First randomize the mat1.mix row order, and use the same row order in mat2.mix before using serpentine -# In fact, change nothing to shuffle or not -cat("\nSERPENTINE BINNING\n") -fun_report(data = "\n\n################################ SERPENTINE BINNING", path = path.out, output = log.file, sep = 4) -# see https://rstudio.github.io/reticulate/reference/index.html for all the functions of reticulate -# reticulate::py_discover_config() # version of python displayed. Not necessary here because do at the end of the run using reticulate::py_config() which is more complete - - -# Theo Serpentine -theo.ini.date <- Sys.time() # time of process begin, converted into seconds -theo.ini.time <- as.numeric(theo.ini.date) # time of process begin, converted into seconds -# parallelization -tempo.thread.nb = parallel::detectCores(all.tests = FALSE, logical = TRUE) # detect the number of threads -if(is.null(thread.nb)){ -thread.nb <- tempo.thread.nb - 1 -}else if(tempo.thread.nb < thread.nb){ -thread.nb <- tempo.thread.nb -} -while (serp.iter.nb %% thread.nb != 0) { -thread.nb <- thread.nb - 1 -} -tempo.cat <- paste0("NUMBER OF THREADS USED: ", thread.nb) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- paste0("NUMBER OF ITERATION PER THREAD: ", serp.iter.nb / thread.nb) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.cat <- paste0("SERPENTINE RUN INITIATED ON THEO MATRICES (DIMENSION ", paste(dim(mat1.mix), collapse = " x "), ") AT: ", theo.ini.date) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -tempo.x <- 1:thread.nb -Clust <- parallel::makeCluster(thread.nb, outfile = paste0(path.out, "/theo_serp_parall_log.txt")) # outfile to print or cat during parallelization (only possible in a file, outfile = "" do not work on windows) -tempo.cat <- paste0("COLUMN NUMBER SPLIT FOR PARALLELISATION") -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = parallel::clusterSplit(Clust, tempo.x), output = log.file, path = path.out) -res.theo <- parallel::clusterApply( -cl = Clust, -x = parallel::clusterSplit(Clust, tempo.x), # split 1:thread.nb vector into thread.nb number of threads, thus 1 for thread 1, 2 for thread 2, etc. This is to set the seed. -mat1.mix = mat1.mix, -mat2.mix = mat2.mix, -serp.threshold = serp.threshold, -serp.minthreshold = serp.minthreshold, -serp.iter.nb = serp.iter.nb, -thread.nb = thread.nb, -serp.symmet.input = serp.symmet.input, -# very important because another R -path.function1 = path.function1, -req.package.list = req.package.list, -path.lib = path.lib, -python = python, -req.python.package.list = req.python.package.list, -path.python.lib = path.python.lib, -path.out = path.out, -# end very important because another R -fun = function(x, mat1.mix, mat2.mix, serp.threshold, serp.minthreshold, serp.iter.nb, thread.nb, serp.symmet.input, path.function1, req.package.list, path.lib, python, req.python.package.list, path.python.lib, path.out){ -# check again: very important because another R -source(path.function1) -fun_pack(req.package = req.package.list, path.lib = path.lib, load = TRUE) # load = TRUE to be sure that functions are present in the environment. And this prevent to use R.path.lib argument of fun_python_pack() -reticulate::use_python(Sys.which(python), required = TRUE) # required to avoid erratic python version use by reticulate when importing packages (see fun_python_pack()) -fun_python_pack(req.package = req.python.package.list, path.python.exec = python, path.lib = path.python.lib, R.path.lib = path.lib) -# end check again: very important because another R -serpentine <- reticulate::import("serpentine") # serpentine <- reticulate::import_from_path("c:/programdata/anaconda3/lib/site-packages/serpentine") -# saving random seed. see also Protocol 100-rev0 Parallelization in R.docx -# may not be required here but left in case -if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment -tempo.random.seed <- .Random.seed -on.exit(assign(".Random.seed", tempo.random.seed, env = .GlobalEnv)) -}else{ -on.exit(set.seed(NULL)) # inactivate seeding -> return to complete randomness -} -# end saving random seed. see also Protocol 100-rev0 Parallelization in R.docx -reticulate::py_set_seed(x) -# tempo.file <- file(description = paste0(path.out, "/theo_serp_parall_log.txt"), open = "a+") # open a connection file -# sink(file = tempo.file, append = TRUE, type = "output") # try to divert the stdout and stderr to the file, append = TRUE because file already exists -# sink(file = tempo.file, append = TRUE, type = "message") # try to divert the stdout and stderr to the file, append = TRUE because file already exists -serpentine$serpentine$print <- serpentine$serpentine$alternate_print(paste0(path.out, "/theo_serp_parall_log.txt")) # 1) Lyam has added a small function def alternate_print(logfile): return functools.partial(print, file=open(logfile, "a")) inside serpentine. 2) this function can be called using serpentine$serpentine$alternate_print in R, after having imported serpentine as module using reticulate. 3) serpentine$serpentine$alternate_print function has a single argument. Here I gave to this argument paste0(path.out, "/theo_serp_parall_log.txt"). Thus, print is modified. 4) This modification has to be assigned to the print function of serpentine$serpentine -output <- serpentine$serpentine$serpentin_binning(A = mat1.mix, B = mat2.mix, threshold = serp.threshold, minthreshold = serp.minthreshold, iterations = as.integer(serp.iter.nb / thread.nb), parallel = 1, force_symmetric = FALSE, verbose = TRUE) # BEWARE: must be parallel = 1, because the parallelization must be controled by R not python. In serpentine, the parall argument split iterations argument to speed the code up. Then, perform the average of the iterations matrices as output matrices. The code here does the same in R # force_symmetric = FALSE because we do not have symmetric matrix with theo matrices -# sink(file = NULL) # close the diversion, the connection file is close when the R session is closed -# sink(file = NULL) # twice because 2 opened -return(output) -} -) -parallel::stopCluster(Clust) -tempo.date <- Sys.time() -tempo.time <- as.numeric(tempo.date) -tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - theo.ini.time)) -tempo.cat <- paste0("SERPENTINE RUN ON THEO MATRICES ACHIEVED AT: ", tempo.date, " | TIME LAPSE: ", tempo.lapse) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -# end parallelization -# average of the matrices from serpentine -if(thread.nb > 1){ -mat1.theo.serp <- vector("list", thread.nb) -mat2.theo.serp <- vector("list", thread.nb) -mat.diff.theo.serp <- vector("list", thread.nb) -for(i0 in 1:thread.nb){ -mat1.theo.serp[[i0]] <- res.theo[[i0]][[1]] -mat2.theo.serp[[i0]] <- res.theo[[i0]][[2]] -mat.diff.theo.serp[[i0]] <- 2^res.theo[[i0]][[3]] # diff because log2(mat2) - log2(mat1) -> log2(mat2 / mat1). BEWARE the mat2 / mat1 convention for the ratio in serpentine. BEWARE: already log2 transformed. Thus, I removed using by 2^ -} -mat1.theo.serp <- fun_mat_op(mat1.theo.serp, kind.of.operation = "+") / thread.nb -mat2.theo.serp <- fun_mat_op(mat2.theo.serp, kind.of.operation = "+") / thread.nb -mat.diff.theo.serp <- fun_mat_op(mat.diff.theo.serp, kind.of.operation = "+") / thread.nb -}else{ -mat1.theo.serp <- res.theo[[1]][[1]] -mat2.theo.serp <- res.theo[[1]][[2]] -mat.diff.theo.serp <- 2^res.theo[[1]][[3]] -} -# end average of the matrices from serpentine -if(keep == FALSE){ -tempo.list <- c("mat1.mix", "mat2.mix", "res.theo") -tempo.cat <- paste0("THEO MATRICES JUST BEFORE SERPENTINE AND SERPENTINE PARALLELIZATION SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} - - -# Obs Serpentine -obs.ini.date <- Sys.time() # time of process begin, converted into seconds -obs.ini.time <- as.numeric(obs.ini.date) # time of process begin, converted into seconds -tempo.cat <- paste0("SERPENTINE RUN INITIATED ON OBS MATRICES (DIMENSION ", paste(dim(mat1.obs), collapse = " x "), ") AT: ", obs.ini.date) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -# parallelization -tempo.x <- rev(1:thread.nb) # rev to do not have the same for theo and obs -Clust <- parallel::makeCluster(thread.nb, outfile = paste0(path.out, "/obs_serp_parall_log.txt")) # outfile to print or cat during parallelization (only possible in a file, outfile = "" do not work on windowsâ—‹) -tempo.cat <- paste0("COLUMN NUMBER SPLIT FOR PARALLELISATION") -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = parallel::clusterSplit(Clust, tempo.x), output = log.file, path = path.out) -res.obs <- parallel::clusterApply( -cl = Clust, -x = parallel::clusterSplit(Clust, tempo.x), # split 1:thread.nb vector into thread.nb number of threads, thus 1 for thread 1, 2 for thread 2, etc. This is to set the seed. -mat1.obs = mat1.obs, -mat2.obs = mat2.obs, -serp.threshold = serp.threshold, -serp.minthreshold = serp.minthreshold, -serp.iter.nb = serp.iter.nb, -thread.nb = thread.nb, -serp.symmet.input = serp.symmet.input, -# very important because another R -path.function1 = path.function1, -req.package.list = req.package.list, -path.lib = path.lib, -python = python, -req.python.package.list = req.python.package.list, -path.python.lib = path.python.lib, -path.out = path.out, -# end very important because another R -fun = function(x, mat1.obs, mat2.obs, serp.threshold, serp.minthreshold, serp.iter.nb, thread.nb, serp.symmet.input, path.function1, req.package.list, path.lib, python, req.python.package.list, path.python.lib, path.out){ -# check again: very important because another R -source(path.function1) -fun_pack(req.package = req.package.list, path.lib = path.lib, load = TRUE) # load = TRUE to be sure that functions are present in the environment. And this prevent to use R.path.lib argument of fun_python_pack() -reticulate::use_python(Sys.which(python), required = TRUE) # required to avoid erratic python version use by reticulate when importing packages (see fun_python_pack()) -fun_python_pack(req.package = req.python.package.list, path.python.exec = python, path.lib = path.python.lib, R.path.lib = path.lib) -# end check again: very important because another R -serpentine <- reticulate::import("serpentine") # serpentine <- reticulate::import_from_path("c:/programdata/anaconda3/lib/site-packages/serpentine") -# saving random seed. see also Protocol 100-rev0 Parallelization in R.docx -# may not be required here but left in case -if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment -tempo.random.seed <- .Random.seed -on.exit(assign(".Random.seed", tempo.random.seed, env = .GlobalEnv)) -}else{ -on.exit(set.seed(NULL)) # inactivate seeding -> return to complete randomness -} -# end saving random seed. see also Protocol 100-rev0 Parallelization in R.docx -reticulate::py_set_seed(x) -serpentine$serpentine$print <- serpentine$serpentine$alternate_print(paste0(path.out, "/obs_serp_parall_log.txt")) # 1) Lyam has added a small function def alternate_print(logfile): return functools.partial(print, file=open(logfile, "a")) inside serpentine. 2) this function can be called using serpentine$serpentine$alternate_print in R, after having imported serpentine as module using reticulate. 3) serpentine$serpentine$alternate_print function has a single argument. Here I gave to this argument paste0(path.out, "/theo_serp_parall_log.txt"). Thus, print is modified. 4) This modification has to be assigned to the print function of serpentine$serpentine -output <- serpentine$serpentine$serpentin_binning(A = mat1.obs, B = mat2.obs, threshold = serp.threshold, minthreshold = serp.minthreshold, iterations = as.integer(serp.iter.nb / thread.nb), parallel = 1, force_symmetric = serp.symmet.input, verbose = TRUE) # BEWARE: must be parallel = 1, because the parallelization must be controled by R not python. In serpentine, the parall argument split iterations argument to speed the code up. Then, perform the average of the iterations matrices as output matrices. The code here does the same in R # force_symmetric = TRUE to force the output as symmetrical matrix (should be used when the input matrices are symmetrical -return(output) -} -) -parallel::stopCluster(Clust) -tempo.date <- Sys.time() -tempo.time <- as.numeric(tempo.date) -tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - obs.ini.time)) -tempo.cat <- paste0("SERPENTINE RUN ON OBS MATRICES ACHIEVED AT: ", tempo.date, " | TIME LAPSE: ", tempo.lapse) -cat(paste0("\n ", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -# end parallelization -# average of the matrices from serpentine -if(thread.nb > 1){ -mat1.obs.serp <- vector("list", thread.nb) -mat2.obs.serp <- vector("list", thread.nb) -mat.diff.obs.serp <- vector("list", thread.nb) -for(i0 in 1:thread.nb){ -mat1.obs.serp[[i0]] <- res.obs[[i0]][[1]] -mat2.obs.serp[[i0]] <- res.obs[[i0]][[2]] -# mat.diff.obs.serp[[i0]] <- 2^res.obs[[i0]][[3]] # diff because log2(mat2) - log2(mat1) -> log2(mat2 / mat1). BEWARE the mat2 / mat1 convention for the ratio in serpentine. BEWARE: already log2 transformed. Thus, I removed using by 2^ -# if( ! fun_comp_2d(round(mat2.obs.serp[[i0]] / mat1.obs.serp[[i0]], 4), round(mat.diff.obs.serp[[i0]], 4))$identical.content){ # test because I use below mat2.obs.serp / mat1.obs.serp and not mat.diff.obs.serp -# tempo.cat <- paste0("\n\n============\n\nINTERNAL ERROR CODE IN SLITHERINE\nTHE 2^mat.diff.obs.serp MATRIX IS NOT IDENTICAL TO mat2.obs.serp / mat1.obs.serp in loop ", i0, ". THIS WILL AFFECT DIFFERENTIAL HEATMAP PLOTTING USING THE mask.plot.fun() FUNCTION\n\n============\n\n") -# stop(tempo.cat) -# } -} -mat1.obs.serp <- fun_mat_op(mat1.obs.serp, kind.of.operation = "+") / thread.nb -mat2.obs.serp <- fun_mat_op(mat2.obs.serp, kind.of.operation = "+") / thread.nb -# we agree with Lyam that we first make the mean of mat1, and mean of mat2, and finally the ratio of mean(mat2)/mean(mat1), more than mean(mat2/mat1). Thus, this would not be good: mat.diff.obs.serp <- fun_mat_op(mat.diff.obs.serp, kind.of.operation = "+") / thread.nb -}else{ -mat1.obs.serp <- res.obs[[1]][[1]] -mat2.obs.serp <- res.obs[[1]][[2]] -} -mat.diff.obs.serp <- mat2.obs.serp / mat1.obs.serp -# end average of the matrices from serpentine -if(keep == FALSE){ -tempo.list <- c("mat1.obs", "mat2.obs", "res.obs") -tempo.cat <- paste0("OBS MATRICES JUST BEFORE SERPENTINE AND SERPENTINE PARALLELIZATION SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} -# export of binned obs and theo matrices -loop.mat.names <- c("mat1.theo.serp", "mat2.theo.serp", "mat1.obs.serp", "mat2.obs.serp") -for(i0 in 1:length(loop.mat.names)){ -write.table(get(loop.mat.names[i0]), file = paste0(path.out, "/", loop.mat.names[i0], ".txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -# end export of binned obs and theo matrices - - -################ end serpentine - - -################ heatmaps post serpentine - - -# heatmap -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "POST SERPENTINE HEATMAP", text.size = 3) -loop.mat.names <- c("mat1.theo.serp", "mat2.theo.serp", "mat1.obs.serp", "mat2.obs.serp") -loop.heatmap.title <- c("THEO MAT1 AFTER SERPENTINE", "THEO MAT2 AFTER SERPENTINE", "OBS MAT1 AFTER SERPENTINE", "OBS MAT2 AFTER SERPENTINE") -loop.heatmap.title2 <- c("NO SYMMETRIC MATRIX", "NO SYMMETRIC MATRIX", ifelse(serp.symmet.input == TRUE, "SYMMETRIC MATRIX (FORCED BY USER OPTION)", "NO SYMMETRIC MATRIX"), ifelse(serp.symmet.input == TRUE, "SYMMETRIC MATRIX (FORCED BY USER OPTION)", "NO SYMMETRIC MATRIX")) -heatmap.range3 <- NULL -for(i0 in 1:length(loop.mat.names)){ -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0]), na.rm = TRUE) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) # log transfo -} -heatmap.range3 <- range(c(heatmap.range3, tempo.data.plot), na.rm = TRUE, finite = TRUE) -} -loop.heatmap.title <- paste0(loop.heatmap.title, "\n", if(transfo == "log2"){"LOG2(x + 1) "}else if(transfo == "log10"){"LOG10(x + 1) "}else{"NO "}, "TRANSFORMATION\nFINAL MATRIX IS CELL AVERAGE OF ", thread.nb, " SERPENTINE BINNED MATRICES (PARALLELIZATION)\nSCALE RANGE: ", paste(fun_round(heatmap.range3, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)\n", loop.heatmap.title2) -for(i0 in 1:length(loop.mat.names)){ -tempo.data.plot <- get(loop.mat.names[i0]) / mean(get(loop.mat.names[i0]), na.rm = TRUE) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot + 1) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_heatmap(data1 = tempo.data.plot, legend.name1 = "", limit1 = heatmap.range3, midpoint1 = mean(heatmap.range3, na.rm = TRUE), title = loop.heatmap.title[i0], text.size = heatmap.text.size, title.text.size = title.text.size) -} -# end heatmap - - -# differential heatmap -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "POST SERPENTINE DIFFERENTIAL HEATMAP", text.size = 3) -# range computation -heatmap.range4 <- NULL -tempo.data.plot <- mat.diff.obs.serp / mean(mat.diff.obs.serp, na.rm = TRUE) # no log data -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) -} -heatmap.range4 <- range(c(heatmap.range4, tempo.data.plot), na.rm = TRUE, finite = TRUE) -tempo.data.plot <- mat.diff.theo.serp / adj.mean.fun(mat.diff.theo.serp, text = "AFTER SERPENTINE THEO HEATMAP NORMALIZATION ACCORDING TO THE WEIGHTED DIAGONAL MEAN") # weighted diag for theo only -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) -} -heatmap.range4 <- range(c(heatmap.range4, tempo.data.plot), na.rm = TRUE, finite = TRUE) -heatmap.range4 <- max(abs(heatmap.range4), na.rm = TRUE) -heatmap.range4 <- c(-heatmap.range4, heatmap.range4) # to center on zero -# end range computation - - -# plot -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -heatmap.title <- paste0("DIFFERENTIAL THEO AFTER SERPENTINE (THEO2 / THEO1)\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range4, 2), collapse = " , "), "\nNORMALIZED DISPLAY (WEIGHTED DIAGONAL MEAN DIVISION)") -tempo.data.plot <- mat.diff.theo.serp / adj.mean.fun(mat.diff.theo.serp, text = "AFTER SERPENTINE THEO HEATMAP NORMALIZATION ACCORDING TO THE WEIGHTED DIAGONAL MEAN") # weighted diag for theo only -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) -} -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = heatmap.range4, midpoint1 = 0, title = heatmap.title, title.text.size = title.text.size) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -heatmap.title <- paste0("DIFFERENTIAL OBS AFTER SERPENTINE (OBS2 / OBS1)\n", if(transfo == "log2"){"LOG2(x) "}else if(transfo == "log10"){"LOG10(x) "}else{"NO "}, "TRANSFORMATION\nSCALE RANGE: ", paste(fun_round(heatmap.range4, 2), collapse = " , "), "\nNORMALIZED DISPLAY (GLOBAL MEAN DIVISION)", ifelse(serp.symmet.input == TRUE, "\nSYMMETRIC MATRIX (FORCED BY USER OPTION)", "\nNO SYMMETRIC MATRIX")) -tempo.data.plot <- mat.diff.obs.serp / mean(mat.diff.obs.serp, na.rm = TRUE) -if(transfo != "no"){ -tempo.data.plot <- get(transfo)(tempo.data.plot) -} -fun_gg_heatmap(data1 = tempo.data.plot, legend.name = "", limit1 = heatmap.range4, midpoint1 = 0, title = heatmap.title, title.text.size = title.text.size) -# end plot -# end differential heatmap - - -################ significant differences post serpentine - - -cat("\nSIGNIFICANT DIFFERENCES POST SERPENTINE (SLITHERINE)\n") -fun_report(data = "\n\n################################ SIGNIFICANT DIFFERENCES POST SERPENTINE (SLITHERINE)", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES POST SERPENTINE\n(SLITHERINE)", text.size = 3) -# theo dataframe alone -if(transfo != "no"){ -fun_report(data = paste0("SIGNIFICANT DIFFERENCES PERFORMED ON ", toupper(transfo), " TRANSFORMED DATA"), output = log.file, path = path.out) -theo <- data.frame(MEAN = get(transfo)(as.vector((mat1.theo.serp + mat2.theo.serp) / 2)), RATIO = get(transfo)(as.vector(mat2.theo.serp / mat1.theo.serp)), MATRICES = "Theo", coord_1D = 1:(nrow(mat1.theo.serp) * ncol(mat1.theo.serp)), stringsAsFactors = FALSE) # mat2.theo.serp / mat1.theo.serp to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -}else{ -fun_report(data = paste0("SIGNIFICANT DIFFERENCES PERFORMED ON ", toupper(transfo), "N TRANSFORMED DATA"), output = log.file, path = path.out) -theo <- data.frame(MEAN = as.vector((mat1.theo.serp + mat2.theo.serp) / 2), RATIO = as.vector(mat2.theo.serp / mat1.theo.serp), MATRICES = "Theo", coord_1D = 1:(nrow(mat1.theo.serp) * ncol(mat1.theo.serp)), stringsAsFactors = FALSE) # mat2.theo.serp / mat1.theo.serp to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -} -# observed dataframe -if(transfo != "no"){ -obs <- data.frame(MEAN = get(transfo)(as.vector((mat1.obs.serp + mat2.obs.serp) / 2)), RATIO = get(transfo)(as.vector(mat2.obs.serp / mat1.obs.serp)), MATRICES = "Obs", coord_1D = 1:(nrow(mat1.obs.serp) * ncol(mat1.obs.serp)), stringsAsFactors = FALSE) # mat2.obs.serp / mat1.obs.serp to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -}else{ -obs <- data.frame(MEAN = as.vector((mat1.obs.serp + mat2.obs.serp) / 2), RATIO = as.vector(mat2.obs.serp / mat1.obs.serp), MATRICES = "Obs", coord_1D = 1:(nrow(mat1.obs.serp) * ncol(mat1.obs.serp)), stringsAsFactors = FALSE) # mat2.obs.serp / mat1.obs.serp to respect serpentine convention # the coord_1D coordinate is the 1D position of a cell in a matrix -} -final <- rbind(theo, obs) -# BEWARE: segment.post.serp, signif.theo.dot.post, signif.obs.dot.post, final, obs and theo integrate log transfo already -segment.post.serp <- fun_segmentation(data1 = theo, x1 = "MEAN", y1 = "RATIO", x.range.split = range.split, x.step.factor = step.factor, error = error, data2 = obs, x2 = "MEAN", y2 = "RATIO", data2.pb.dot = "signif", plot = FALSE, graph.in.file = FALSE) -fun_report(data = "UNKNOWN DOTS HAVE BEEN CONSIDERED AS SIGNIFICANTS (ARGUMENT data2.pb.dot OF fun_segmentation() SET TO \"signif\")", output = log.file, path = path.out) -if( ! is.null(segment.post.serp$hframe)){ -names(segment.post.serp$hframe)[names(segment.post.serp$hframe) == "kind"] <- "FRAMES" # only dot nb are finally kept -}else{ -tempo.cat <- paste0("BEWARE: NO HORIZONTAL FRAME DETECTED DURING SEGMENTATION POST SERPENTINE") -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -# cat(paste0("\n", segment.post.serp$warnings, "\n")) -warning.message <- paste0(warning.message, ifelse(is.null(warning.message), "", "\n"), segment.post.serp$warnings) # in fact, abs(tempo.cor) is systematically used -fun_report(data = segment.post.serp$warnings, output = log.file, path = path.out) - - -# second y filtering: -# weighting the cell ratio before computing the global mean ratio, after moving NA, Inf columns and also NA, Inf cells in columns with values -if(transfo != "no"){ -ratio.center.adj <- adj.mean.fun(get(transfo)(mat2.theo.serp / mat1.theo.serp), text = "BEFORE SERPENTINE ratio.center.adj PARAMETER THRESHOLD ADJUSTEMENT ACCORDING TO THE WEIGHTED MEAN") -tempo.sup <- get(transfo)(max(ratio.limit.sig, 1 / ratio.limit.sig, na.rm = TRUE)) -tempo.inf <- get(transfo)(min(ratio.limit.sig, 1 / ratio.limit.sig, na.rm = TRUE)) -y.range.limit.sup <- tempo.sup - ifelse(ratio.center.adj > 0, - abs(ratio.center.adj), abs(ratio.center.adj)) # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -y.range.limit.inf <- tempo.inf - ifelse(ratio.center.adj > 0, - abs(ratio.center.adj), abs(ratio.center.adj)) # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -}else{ -ratio.center.adj <- adj.mean.fun((mat2.theo.serp / mat1.theo.serp), text = "AFTER SERPENTINE ratio.center.adj PARAMETER THRESHOLD ADJUSTEMENT ACCORDING TO THE WEIGHTED MEAN") -y.range.limit.sup <- ratio.center.adj * ratio.limit.sig # -y.range.limit.inf <- ratio.center.adj / ratio.limit.sig # minus to adjust on the mean coverage ratio. Because the idea is to remove ratio less than 2, but taking the difference of coverage between the mat1 and mat2. Thus, we want an absolute ratio less than 2 for the removal -} -# end weighting the cell ratio before computing the global mean ratio, after moving NA, Inf columns and also NA, Inf cells in columns with values -signif.theo.dot.post <- segment.post.serp$data1.signif.dot # significant table of data1 post serpentine -if( ! is.null(signif.theo.dot.post)){ -if(all(signif.theo.dot.post$RATIO > y.range.limit.inf & signif.theo.dot.post$RATIO < y.range.limit.sup)){ -tempo.cat <- paste0("BEWARE: SIGNIFICANT THEO DOTS DETECTED DURING SEGMENTATION POST SERPENTINE\nBUT NOT ANYMORE AFTER USING THE ratio.limit.sig PARAMETER (", ratio.limit.sig, ")\nTAKING INTO ACCOUNT THE GLOBAL MEAN RATIO (", fun_round(ratio.center.adj), "), THE SIGNIFICANT LIMITS WHERE: ", paste(fun_round(c(y.range.limit.inf, y.range.limit.sup)), collapse = " ")) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = "THE SIGNIFICANT DOTS ARE:", output = log.file, path = path.out, sep = 1) -fun_report(data = signif.theo.dot.post, output = log.file, path = path.out) -signif.theo.dot.post <- NULL -}else{ -signif.theo.dot.post <- unique(signif.theo.dot.post[ ! (signif.theo.dot.post$RATIO > y.range.limit.inf & signif.theo.dot.post$RATIO < y.range.limit.sup), ]) # only dot nb are finally kept -} -}else{ -tempo.cat <- paste0("NO SIGNIFICANT THEO DOTS DETECTED DURING SEGMENTATION POST SERPENTINE") -# cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -signif.obs.dot.post <- segment.post.serp$data2.signif.dot # significant table of data2 post serpentine -sup.signif.obs.dot.post <- NULL -inf.signif.obs.dot.post <- NULL -if( ! is.null(signif.obs.dot.post)){ -if(all(signif.obs.dot.post$RATIO > y.range.limit.inf & signif.obs.dot.post$RATIO < y.range.limit.sup)){ -tempo.cat <- paste0("BEWARE: SIGNIFICANT OBS DOTS DETECTED DURING SEGMENTATION POST SERPENTINE\nBUT NOT ANYMORE AFTER USING THE ratio.limit.sig PARAMETER (", ratio.limit.sig, ")\nTAKING INTO ACCOUNT THE GLOBAL MEAN RATIO (", fun_round(ratio.center.adj), "), THE SIGNIFICANT LIMITS WHERE: ", paste(fun_round(c(y.range.limit.inf, y.range.limit.sup)), collapse = " ")) -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -fun_report(data = "THE SIGNIFICANT DOTS ARE:", output = log.file, path = path.out, sep = 1) -fun_report(data = signif.obs.dot.post, output = log.file, path = path.out) -signif.obs.dot.post <- NULL -}else{ -signif.obs.dot.post <- unique(signif.obs.dot.post[ ! (signif.obs.dot.post$RATIO > y.range.limit.inf & signif.obs.dot.post$RATIO < y.range.limit.sup), ]) # only dot nb are finally kept -sup.signif.obs.dot.post <- signif.obs.dot.post[signif.obs.dot.post$RATIO > 0, ] # positive log ratio, i.e., mat2 > mat1 -inf.signif.obs.dot.post <- signif.obs.dot.post[signif.obs.dot.post$RATIO < 0, ] # negative log ratio, i.e., mat2 < mat1 -} -}else{ -tempo.cat <- paste0("NO SIGNIFICANT OBS DOTS DETECTED DURING SEGMENTATION POST SERPENTINE") -cat(paste0("\n", tempo.cat, "\n")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -} -# end second y filtering: - -fun_report(data = paste0("POST SERPENTINE SEGMENTATION DATA (POTENTIALLY LOG TRANSFORMED) SAVED IN: ", paste0(path.out, "/segmentation_post_serp.RData")), output = log.file, path = path.out) -save(list = c("segment.post.serp", "signif.theo.dot.post", "signif.obs.dot.post", "sup.signif.obs.dot.post", "inf.signif.obs.dot.post", "theo", "obs"), file = paste0(path.out, "/segmentation_post_serp.RData")) -# segment.post.serp result of segmentation (no second y filtering) -# signif.theo.dot.post significant theo dot after second y filtering -# signif.obs.dot.post significant obs dot after second y filtering -# theo mean and ratio cells of the 2 theo matrices, without transformation, used for the segmentation -# obs mean and ratio cells of the 2 theo matrices, without transformation, used for the segmentation - -# plot verif obs dots outside -# MD overlay plot before serpentine -segm.x.range <- range(final$MEAN, na.rm = TRUE, finite = TRUE) -segm.y.range <- range(c(final$RATIO, y.range.limit.inf, y.range.limit.sup), na.rm = TRUE, finite = TRUE) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.theo.dot.post)){ # signif dots in theo matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE + THEO SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = theo, L2 = signif.theo.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nTHEO MAT ALONE (NO THEO SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = theo, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[2], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} - -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.obs.dot.post)){ # signif dots in obs matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE + OBS SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = obs, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS MAT ALONE (NO OBS SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = obs, L3 = segment.post.serp$hframe, L4 =data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2)[1], L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -if( ! is.null(signif.obs.dot.post)){ # signif dots in obs matrices -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT + OBS SIGNIFICANT DOTS\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = final, L2 = signif.obs.dot.post, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L2 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L2 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L2 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L2 = "SIGNIF DOTS", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L2 = "black", L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L2 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L2 = 1, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -}else{ -tempo.title <- paste0("SEGMENTATION OF THE MEAN / RATIO MATRIX CELLS\nOBS AND THEO MAT (NO OBS SIGNIFICANT DOTS)\n", if(transfo == "log2"){"MEAN AND RATIO LOG2(x) "}else if(transfo == "log10"){"MEAN AND RATIO LOG10(x) "}else{"NO LOG TRANSFORMATION"}, "\nX SCALE RANGE: ", paste(fun_round(segm.x.range, 2), collapse = " , "), "\nY SCALE RANGE: ", paste(fun_round(segm.y.range, 2), collapse = " , ")) -fun_gg_scatter(data1 = list(L1 = final, L3 = segment.post.serp$hframe, L4 = data.frame(y = c(y.range.limit.inf, y.range.limit.sup), FACTOR_2_CUTOFFS = c("INF_LIMIT", "SUP_LIMIT"))), x = list(L1 = "MEAN", L3 = "x", L4 = NULL), y = list(L1 = "RATIO", L3 = "y", L4 = "y"), categ = list(L1 = "MATRICES", L3 = "FRAMES", L4 = "FACTOR_2_CUTOFFS"), legend.name = list(L1 = "MATRICES", L3 = "FRAMES", L4 = paste0(fun_round(ratio.limit.sig), " X CUTOFFS")), color = list(L1 = fun_gg_palette(2), L3 = "blue", L4 = "orange"), geom = list(L1 = "geom_point", L3 = "geom_path", L4 = "geom_hline"), alpha = list(L1 = 0.25, L3 = 0.5, L4 = 0.5), dot.size = dot.size, line.size = line.size, text.size = text.size, title.text.size = title.text.size, xlog = transfo, xlab = "MEAN", x.tick.nb = 8, ylog = transfo, xlim = segm.x.range, ylim = segm.y.range, ylab = "RATIO", y.tick.nb = 8, title = tempo.title, classic = TRUE, raster = raster, x.left.extra.margin = 0.05, x.right.extra.margin = 0.05, y.top.extra.margin = 0.05, y.bottom.extra.margin = 0.05) -} -# end plot verif obs dots outside -# end MD overlay plot before serpentine - - -# Mask of the obs data outside the cloud of the theo data on the MDMR plot (MEan Difference Mean Ratio) -if(is.null(signif.obs.dot.post)){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "POST SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT DOT DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT DOT DETECTED", output = log.file, path = path.out) -}else{ -# file saving -sup.obs.mask.post <- matrix(0, nrow = nrow(mat1.obs.serp), ncol = nrow(mat1.obs.serp)) # matrix same dim as obs full of zero -inf.obs.mask.post <- matrix(0, nrow = nrow(mat1.obs.serp), ncol = nrow(mat1.obs.serp)) # matrix same dim as obs full of zero -if(nrow(sup.signif.obs.dot.post) > 0){ -sup.obs.mask.post[sup.signif.obs.dot.post$coord_1D] <- 1 # create the mask. If signif.obs.dot.post is NULL, no 1 added -fun_report(data = paste0("POST SERPENTINE MAT2 > MAT1 MASK DATA SAVED IN: ", paste0(path.out, "/sup_mask_post_serp.txt")), output = log.file, path = path.out) -write.table(sup.obs.mask.post, file = paste0(path.out, "/sup_mask_post_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -if(nrow(inf.signif.obs.dot.post) > 0){ -inf.obs.mask.post[inf.signif.obs.dot.post$coord_1D] <- 1 # create the mask. If signif.obs.dot.post is NULL, no 1 added -fun_report(data = paste0("POST SERPENTINE MAT2 < MAT1 MASK DATA SAVED IN: ", paste0(path.out, "/inf_mask_post_serp.txt")), output = log.file, path = path.out) -write.table(inf.obs.mask.post, file = paste0(path.out, "/inf_mask_post_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -} -obs.mask.post <- sup.obs.mask.post + inf.obs.mask.post -fun_report(data = paste0("POST SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/mask_post_serp.txt")), output = log.file, path = path.out) -write.table(inf.obs.mask.post, file = paste0(path.out, "/mask_post_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(nrow(sup.signif.obs.dot.post) > 0){ -mask.plot.fun(mask_fun = sup.obs.mask.post, mat1_fun = mat1.obs.serp, mat2_fun = mat2.obs.serp, serp_kind = "AFTER", mask_kind = "POSITIVE (MAT2 > MAT1)", text_fun = ifelse(serp.symmet.input == TRUE, "\nSYMMETRIC MATRIX (FORCED BY USER OPTION)", "\nNO SYMMETRIC MATRIX")) -}else{ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "POST SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 > MAT1 SIGNIFICANT DOTS DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 > MAT1 SIGNIFICANT DOTS DETECTED", output = log.file, path = path.out) -} -if(nrow(inf.signif.obs.dot.post) > 0){ -mask.plot.fun(mask_fun = inf.obs.mask.post, mat1_fun = mat1.obs.serp, mat2_fun = mat2.obs.serp, serp_kind = "AFTER", mask_kind = "NEGATIVE (MAT2 < MAT1)", text_fun = ifelse(serp.symmet.input == TRUE, "\nSYMMETRIC MATRIX (FORCED BY USER OPTION)", "\nNO SYMMETRIC MATRIX")) -}else{ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "POST SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 < MAT1 SIGNIFICANT DOTS DETECTED", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO MAT2 < MAT1 SIGNIFICANT DOTS DETECTED", output = log.file, path = path.out) -} -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = obs.mask.post, mat1_fun = mat1.obs.serp, mat2_fun = mat2.obs.serp, serp_kind = "AFTER", mask_kind = "FULL", text_fun = ifelse(serp.symmet.input == TRUE, "\nSYMMETRIC MATRIX (FORCED BY USER OPTION)", "\nNO SYMMETRIC MATRIX")) -# end heatmaps -} - - -################ significant differences post serpentine (HIC COMPARE) - - -# see https://bioconductor.org/packages/release/bioc/manuals/HiCcompare/man/HiCcompare.pdf -if(hiccomp == TRUE){ -cat("\nSIGNIFICANT DIFFERENCES POST SERPENTINE (HIC COMPARE)\n") -fun_report(data = "\n\n################################ SIGNIFICANT DIFFERENCES POST SERPENTINE (HIC COMPARE)", path = path.out, output = log.file, sep = 4) -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(text = "SIGNIFICANT DIFFERENCES POST SERPENTINE\n(HIC COMPARE)", text.size = 3) -# p values computations -sparse.name <- cumsum(c(0, rep(binning, ncol(mat1.obs.serp) - 1))) -mat1.obs.serp.sparse <- mat1.obs.serp -colnames(mat1.obs.serp.sparse) <- sparse.name -mat1.obs.serp.sparse <- HiCcompare::full2sparse(mat1.obs.serp.sparse) -mat2.obs.serp.sparse <- mat2.obs.serp -colnames(mat2.obs.serp.sparse) <- sparse.name -mat2.obs.serp.sparse <- HiCcompare::full2sparse(mat2.obs.serp.sparse) -hic.table <- HiCcompare::create.hic.table(mat1.obs.serp.sparse, mat2.obs.serp.sparse, chr = "chr5") -norm.hic.table <- suppressMessages(HiCcompare::hic_loess(hic.table)) # see the help of the hic_loess() function for the description of the returned table -diff.res.serp <- suppressMessages(HiCcompare::hic_compare(norm.hic.table)) -diff.res.serp <- as.data.frame(diff.res.serp) -diff.res.serp <- data.frame(diff.res.serp, PVAL_MASK = 0L, PADJ_MASK = 0L) # add two columns to make the mask matrices -tempo.log.pvalue <- diff.res.serp$p.value <= 0.05 -tempo.log.padj <- diff.res.serp$p.adj <= 0.05 -if(sum(tempo.log.pvalue, na.rm = TRUE) > 0){ -diff.res.serp$PVAL_MASK[tempo.log.pvalue] <- 1 -} -if(sum(tempo.log.padj, na.rm = TRUE) > 0){ -diff.res.serp$PADJ_MASK[tempo.log.padj] <- 1 -} -# end p values computations -# heatmaps -# mask 1 -hiccomp1.mask.post <- NULL -if(sum(tempo.log.pvalue, na.rm = TRUE) <= 0){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "POST SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(NON ADJUSTED P VALUES)", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(NON ADJUSTED P VALUES)", output = log.file, path = path.out) -}else{ -# hiccomp.mask.post <- HiCcompare::sparse2full(diff.res.serp, hic.table = TRUE, column.name = "PVAL_MASK") # does not work because full2sparse() function removes the row of columns full of zero (dimension decreased) -hiccomp1.mask.post <- matrix(0L, nrow = nrow(mat1.obs.serp), ncol = ncol(mat1.obs.serp)) -tempo.mat.coord <- as.matrix(data.frame(match(diff.res.serp$start1[tempo.log.pvalue], sparse.name), -match(diff.res.serp$start2[tempo.log.pvalue], sparse.name))) -tempo.mat.coord <- rbind(tempo.mat.coord, tempo.mat.coord[, c(2, 1)]) # to have a symmetric mask -hiccomp1.mask.post[tempo.mat.coord] <- 1 -# end mask 1 -# file saving -fun_report(data = paste0("POST SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/hicc_mask_post_serp.txt")), output = log.file, path = path.out) -write.table(hiccomp1.mask.post, file = paste0(path.out, "/hicc_mask_post_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = hiccomp1.mask.post, mat1_fun = mat1.obs.serp, mat2_fun = mat2.obs.serp, serp_kind = "AFTER", mask_kind = "FULL", text_fun = "\nHIC COMPARE (NON ADJUSTED P VALUES)") -} -# end mask 1 -# mask 2 -hiccomp2.mask.post <- NULL -if(sum(tempo.log.padj, na.rm = TRUE) <= 0){ -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -fun_gg_empty_graph(title = "POST SERPENTINE MASK", text = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(ADJUSTED P VALUES)", text.size = 3, title.size = title.text.size) -fun_report(data = "NO MASK TO APPLY OVER HEATMAPS\nBECAUSE\nNO SIGNIFICANT CELL DETECTED\n(ADJUSTED P VALUES)", output = log.file, path = path.out) -}else{ -# hiccomp.mask.post <- HiCcompare::sparse2full(diff.res.serp, hic.table = TRUE, column.name = "PVAL_MASK") # does not work because full2sparse() function removes the row of columns full of zero (dimension decreased) -hiccomp2.mask.post <- matrix(0L, nrow = nrow(mat1.obs.serp), ncol = ncol(mat1.obs.serp)) -tempo.mat.coord <- as.matrix(data.frame(match(diff.res.serp$start1[tempo.log.padj], sparse.name), -match(diff.res.serp$start2[tempo.log.padj], sparse.name))) -tempo.mat.coord <- rbind(tempo.mat.coord, tempo.mat.coord[, c(2, 1)]) # to have a symmetric mask -hiccomp2.mask.post[tempo.mat.coord] <- 1 -# end mask 1 -# file saving -fun_report(data = paste0("POST SERPENTINE FULL MASK DATA SAVED IN: ", paste0(path.out, "/hicc_padj_mask_post_serp.txt")), output = log.file, path = path.out) -write.table(hiccomp2.mask.post, file = paste0(path.out, "/hicc_padj_mask_post_serp.txt"), row.names = FALSE, col.names = FALSE, append = FALSE, quote = FALSE, sep = "\t") -# end file saving -# heatmaps -if(activate.pdf == TRUE){ -invisible(dev.set(pdf.nb)) -}else{ -fun_open(pdf.disp = activate.pdf, width.fun = width.wind, height.fun = height.wind) -} -mask.plot.fun(mask_fun = hiccomp2.mask.post, mat1_fun = mat1.obs.serp, mat2_fun = mat2.obs.serp, serp_kind = "AFTER", mask_kind = "FULL", text_fun = "\nHIC COMPARE (ADJUSTED P VALUES)") -} -# end mask -# end heatmaps -if(keep == FALSE){ -tempo.list <- c("mat1.obs.serp.sparse", "mat2.obs.serp.sparse", "diff.res.serp", "hiccomp1.mask.post", "hiccomp2.mask.post") -tempo.cat <- paste0("POST SERPENTINE HIC COMPARE MASK MATRICES SAVED IN: ", paste0(path.out, "/", paste0(tempo.list, collapse = "_"), "_backup.RData")) -fun_report(data = tempo.cat, output = log.file, path = path.out) -save(list = tempo.list, file = paste0(path.out, "/post_serp_hiccomp_backup.RData")) -rm(list = tempo.list) # not saved because initial matrices -} -} - - -} - - -################ Pdf window closing - - -fun_close() - - -################ Environment saving - - -fun_report("\n\n################################ RUNNING END", output = log.file, path = path.out) -end.date <- Sys.time() -end.time <- as.numeric(end.date) -total.lapse <- round(lubridate::seconds_to_period(end.time - ini.time)) -fun_report(data = end.date, path = path.out, output = log.file, vector.cat = TRUE) -fun_report(data = paste0("LAPSE: ", total.lapse), path = path.out, output = log.file, vector.cat = TRUE) -cat(paste0("\nSLITHERINE END\n\nTOTAL TIME LAPSE: ", total.lapse, "\n")) -fun_report(data = paste0("ALL DATA SAVED IN: ", paste0(path.out, "/all_objects.RData")), output = log.file, path = path.out) -save(list = ls(), file = paste0(path.out, "/all_objects.RData")) - - -################ Warning messages - - -if( ! is.null(warning.message)){ -fun_report(data = "\n\n################################ WARNING MESSAGES", path = path.out, output = log.file, sep = 4) -fun_report(data = warning.message, path = path.out, output = log.file) -} - - -################ Parameter printing - - -fun_report("\n\n################################ INITIAL SETTINGS OF PARAMETERS", output = log.file, path = path.out) -fun_report(data = param.ini.settings, path = path.out, output = log.file, vector.cat = TRUE) -fun_report("\n\n################################ R SYSTEM AND PACKAGES", output = log.file, path = path.out) -fun_report(data = sessionInfo(), path = path.out, output = log.file, vector.cat = TRUE) -if(serp.binning == TRUE){ -fun_report("\n\n################################ PYTHON SYSTEM AND PACKAGES", output = log.file, path = path.out) -fun_report(data = reticulate::py_config(), path = path.out, output = log.file, vector.cat = TRUE) # to get the version of python, or use a python chunk ```{python} import IPython; python_session = IPython.sys_info();``` then reticulate::py$python_session -fun_report(data = as.matrix(system("pip freeze", intern = TRUE)), path = path.out, output = log.file, vector.cat = TRUE) # bash command to get the version of the python packages installed on my computer -} -cat("\nSLITHERINE JOB END\n") - - -################################ End Main code - -