diff --git a/README.md b/README.md
index 586531ffe43971175d208c0a7b4f5f45f1688eee..92d1dadee2835aae1b4f65d869222c91a93a80ff 100644
--- a/README.md
+++ b/README.md
@@ -34,11 +34,12 @@ rogge_12231.conf	config file. Parameters need to be set by the user inside this
 rogge_12231_workflow.sh	file allowing checkings and SLURM job submission
 rogge_12231_main_analysis.R	R script run by SLURM
 rogge_12231_data_compilation.R	R script run by SLURM that compil the data from the previous loops
-
 The four files must be in the same directory before running
 
+rogge_12231_manual_graph_adjustment.R	optional R script to run in R, after SLURM job, to improve the representation of the graphs. Open the script and set the parameter inside before running
+
 
-#### OUTPUT DESCRIPTION
+#### TARS OUTPUT DESCRIPTION
 
 ## For each job submitted to SLURM:
 workflow_log	messages printed by the rogge_12231_workflow.sh script
@@ -59,6 +60,11 @@ Check for updated versions (most recent tags) at https://gitlab.pasteur.fr/gmill
 
 #### WHAT'S NEW IN
 
+## v2.0.0
+
+rogge_12231_manual_graph_adjustment.R file added
+
+
 ## v1.0.0
 
 Everything
diff --git a/rogge_12231_data_compilation.R b/rogge_12231_data_compilation.R
index 5505eb402e5274d8afdae91459153f470815bb48..087fc1ce7ff048b55b0c30d13bae9b70892de8c1 100644
--- a/rogge_12231_data_compilation.R
+++ b/rogge_12231_data_compilation.R
@@ -638,7 +638,7 @@ if(nrow(final.mod.gene.names) == 0 | nrow(na.omit(final.mod.gene.names)) == 0){
     }
     print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
     fun_export_data(path = path.out, data = paste0("SEE THE final.mod.gene.names AND THE final.mod.gene.names.freq OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-    backup.name <- c(backup.name, "final.mod.gene.names.freq")
+    backup.name <- c(backup.name, "final.mod.gene.names.freq", "optimal.gene.nb", "selected.gene.names")
 }
 
 
@@ -792,10 +792,9 @@ if(analysis.kind == "valid_boot"){
         tempo <- dev.set(pdf.nb) # assign to avoid the message
         corrplot::corrplot(cor(nano.rf.selected.genes), tl.col = "black", tl.cex = label.size / 10)
         fun_export_data(path = path.out, data = paste0("SEE THE nano.rf.selected.genes OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-        backup.name <- c(backup.name, "nano.rf.selected.genes")
     }
 }
-
+backup.name <- c(backup.name, "nano.rf.selected.genes", "dat")
 
 
 ######## 4.2 Validate the model
@@ -837,6 +836,7 @@ for(i0 in 1:length(data.kind)){
             for(i2 in 1:slurm.loop.nb){
                 # roc curves
                 if(nrow(get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))) > 0){
+                    backup.name <- c(backup.name, paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))
                     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(data = get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))[order(get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))$x, get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))$y, decreasing = FALSE), ], mapping = ggplot2::aes(x = x, y = y), color = "grey", alpha = 0.5))
                 }
             }
@@ -891,6 +891,7 @@ boxdat <- data.frame(ASDAS = df.nano$response_ASDAS_R_NR,
 )
 boxdat_melt <- reshape2::melt(boxdat, id.vars = c("ASDAS", "Cohort.name", "RF.COHORTE"), variable.name = "Gene")
 boxdat_melt$Gene <- factor(boxdat_melt$Gene, levels = unique(boxdat_melt$Gene))
+backup.name <- c(backup.name, "boxdat_melt")
 
 # boxplot
 bar.width <- 0.5
diff --git a/rogge_12231_manual_graph_adjustment.R b/rogge_12231_manual_graph_adjustment.R
index 74545c060c4fc0d9d0a30adbe0d29cf5ba293b78..3179ff01ce0c6d20c75dc5cce7722095200f27a1 100644
--- a/rogge_12231_manual_graph_adjustment.R
+++ b/rogge_12231_manual_graph_adjustment.R
@@ -1,42 +1,139 @@
+################################################################
+##                                                            ##
+##      Rogge_12231 Manual Graph Adjustment v1.0.0            ##
+##                                                            ##
+##                                                            ##
+##                                                            ##
+################################################################
 
 
 
 
+################################ Aim
 
 
+# Manual adjustment of the rogge_12231_data_compilation.R output for valid_boot and full_cross_validation analyses
+# With this code, it is possible to redraw the graph output from a cluster job
 
 
+################################ End Aim
+
+
+################################ Introduction
+
+
+# Compatible with R v3.5.1
+# Packages requirement: see below
+
+
+################################ End Introduction
+
+
+################################ Credits
+
+
+# Gael A Millot | Hub-C3BI, Institut Pasteur, Paris, France
+
+
+################################ End Credits
+
 
 ################################ Initialization
 
-erase.objects <- TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
+
+# R version checking
+if(version$version.string != "R version 3.5.1 (2018-07-02)"){
+    cat("\n\nBEWARE: THE ", version$version.string, " IS NOT THE 3.5.1 RECOMMANDED\n\n")
+}
+# other initializations
+erase.objects = TRUE # write TRUE to erase all the existing objects in R before starting the algorithm and FALSE otherwise. Beginners should use TRUE
 if(erase.objects == TRUE){
     rm(list=ls())
     erase.objects = TRUE
 }
-erase.graphs <- TRUE # write TRUE to erase all the graphic windows in R before starting the algorithm and FALSE otherwise
+erase.graphs = TRUE # write TRUE to erase all the graphic windows in R before starting the algorithm and FALSE otherwise
+
 
 ################################ End Initialization
 
 
+################################ Parameters that need to be set by the user
 
-################################ Recording of the initial parameters
 
+################ File names and locations
 
 
-################################ End Recording of the initial parameters
+data.file <- "Z:\\rogge12231\\first attempt\\rogge_12231_1550598741\\final_res\\compiled_data.RData" # RData file and absolute path to source
+path.function1 <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v4.5.0/cute_little_R_functions.R" # /pasteur/homes/gmillot/Git_versions_to_use/cute_little_R_functions-v4.5.0/cute_little_R_functions.R
+path.out <- "C:/Users/Gael/Desktop/"
+
+################ Parameters
 
-################################ DEBUG
+project.name <- "rogge_12231"
+analysis.kind <- "valid_boot" # kind of analysis. See .conf file
+slurm.loop.nb <- 100 # indicate the number of loops used
+optional.text <- "" # write here an optional text to include in results and graphs
 
 
+################ Graphical and display parameters
 
-activate.pdf <- TRUE ; cat(paste0("\n\n================\n\nERROR: ACTIVE GRAPH FILE PARAMETERS\n\n================\n\n")) # graph file parameter
-cut.off.freq.for.selected.genes <- 0.6  ; cat(paste0("\n\n================\n\nERROR: ACTIVE GRAPH FILE PARAMETERS\n\n================\n\n"))# graph file parameter
 
+activate.pdf <- TRUE # write TRUE for pdf display and FALSE for R display
+window.width <- 7
+window.height <- 7
+amplif <- 14 # increase or decrease the value to increase or decrease the size of the axis numbers and legend
+bar.width <- 0.5 # width of bars
+log.scale <- FALSE # log scale ?
 
+# boxplots
+facet.nrow <- 4 # nomber of rows in the boxplot facet
+facet.ncol <- 4 # nomber of columns in the boxplot facet
 
 
-################################ End DEBUG
+################################ End Parameters that need to be set by the user
+
+
+################################ Recording of the initial parameters
+
+
+param.list <- c(
+    "erase.objects", 
+    "erase.graphs", 
+    "data.file",
+    "path.function1",
+    "path.out",
+    "project.name",
+    "analysis.kind",
+    "slurm.loop.nb",
+    "optional.text", 
+    "activate.pdf",
+    "window.width",
+    "window.height",
+    "amplif",
+    "bar.width",
+    "log.scale",
+    "facet.nrow",
+    "facet.ncol"
+)
+if(any(duplicated(param.list))){
+    stop(paste0("\n\n================\n\nERROR: THE param.list OBJECT CONTAINS DUPLICATED ELEMENTS\n\n================\n\n")) # message for developers
+}
+if(erase.objects == TRUE){
+    created.object.control <- ls()[ ! ls() %in% "param.list"]
+    if( ! (all(created.object.control %in% param.list) & all(param.list %in% created.object.control))){
+        stop(paste0("\n\n================\n\nERROR: INCONSISTENCIES BETWEEN THE param.list ELEMENTS AND THE CREATED OBJECTS\nTHE CREATED OBJECTS (created.object.control) NOT PRESENT IN THE param.list ARE: ", paste(created.object.control[ ! created.object.control %in% param.list], collapse = " "), "\nTHE param.list ELEMENTS NOT PRESENT IN THE CREATED OBJECTS (created.object.control) ARE: ", paste(param.list[ ! param.list %in% created.object.control], collapse = " "), "\n\n================\n\n")) # message for developers
+    }
+}
+char.length <- nchar(param.list)
+space.add <- max(char.length) - char.length + 5
+param.ini.settings <- NULL
+for(i in 1:length(param.list)){
+    param.ini.settings <- c(param.ini.settings, paste0("\n", param.list[i], paste0(rep(" ", space.add[i]), collapse = ""), paste0(get(param.list[i]), collapse = ",")))
+}
+
+
+################################ End Recording of the initial parameters
+
 
 ################################ Packages verification and import
 
@@ -50,24 +147,20 @@ req.package.list <- c(
     "lubridate",
     "RCurl" # for url.exists
 )
-if(path.lib == "none"){
-    path.lib <- .libPaths() #
-}else{
-    # .libPaths(new = ) add path to default path
-  .libPaths(new = sub(x = path.lib, pattern = "/$|\\\\$", replacement = "")) # .libPaths() does not support / at the end of a submitted path. Thus check and replace last / or \\ in path
-}
 for(i0 in 1:length(req.package.list)){
-    if( ! req.package.list[i0] %in% rownames(installed.packages(lib.loc =  path.lib))){
-        stop(paste0("\n\n================\n\nERROR: PACKAGE ", req.package.list[i0], " MUST BE INSTALLED IN THE MENTIONNED DIRECTORY:\n", paste(path.lib, collapse = "\n"), "\n CHECK ALSO IN :\n", paste(.libPaths(), collapse = "\n"), "\n\n================\n\n"))
+    if( ! req.package.list[i0] %in% rownames(installed.packages())){
+        stop(paste0("\n\n================\n\nERROR: PACKAGE ", req.package.list[i0], " MUST BE INSTALLED IN THE MENTIONNED DIRECTORY:\n", paste(.libPaths(), collapse = "\n"), "\n\n================\n\n"))
     }else{
-        suppressPackageStartupMessages(library(req.package.list[i0], lib.loc = path.lib, quietly = TRUE, character.only = TRUE))
+        suppressPackageStartupMessages(library(req.package.list[i0], quietly = TRUE, character.only = TRUE))
     }
 }
 
 ################################ End Packages verification and import
 
+
 ################################ Functions
 
+
 if(length(path.function1) != 1){
     stop(paste0("\n\n============\n\nERROR: path.function1 PARAMETER MUST BE LENGTH 1: ", paste(path.function1, collapse = " "), "\n\n============\n\n"))
 }else if(grepl(x = path.function1, pattern = "^http")){
@@ -86,8 +179,10 @@ if(length(path.function1) != 1){
 
 ################################ End Functions
 
+
 ################################ Main code
 
+
 ################ Pre-ignition checking
 
 arg.check <- NULL # for function debbuging
@@ -97,25 +192,12 @@ ee <- expression(arg.check <- c(arg.check, tempo$problem) , checked.arg.names <-
 # initializations
 tempo <- fun_param_check(data = erase.objects, class = "logical", length = 1) ; eval(ee)
 tempo <- fun_param_check(data = erase.graphs, class = "logical", length = 1) ; eval(ee)
-tempo <- fun_param_check(data = script, class = "character", length = 1) ; eval(ee)
-tempo <- fun_param_check(data = args, class = "character", length = length(tempo.arg.names)) ; eval(ee)
-tempo <- fun_param_check(data = tempo.arg.names, class = "character", length = length(tempo.arg.names)) ; eval(ee)
 
 # imported objects
-tempo <- fun_param_check(data = path.lib, class = "character", length = 1) ; eval(ee)
-if(tempo$problem == FALSE & path.lib != "none"){
-    if( ! dir.exists(path.lib)){
-        cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n"))
-        arg.check <- TRUE
-    }
-}
-tempo <- fun_param_check(data = path.in, class = "character", length = 1) ; eval(ee)
-if(tempo$problem == FALSE & ! dir.exists(path.in)){
-    cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n"))
-    arg.check <- TRUE
-}
-if(tempo$problem == FALSE & ! ("loop1_res_data.RData" %in% list.files(paste0(path.in, "loop1/")))){
-    cat(paste0("\n\n============\n\nERROR: loop1_res_data.RData SHOULD BE PRESENT IN ", paste0(path.in, "loop1/"), ":\nCHECK THE path.in PARAMETER\n\n============\n\n"))
+
+tempo <- fun_param_check(data = data.file, class = "character", length = 1) ; eval(ee)
+if(tempo$problem == FALSE & ! file.exists(data.file)){
+    cat(paste0("\n\n============\n\nERROR: FILE NAME AND  PATH INDICATED IN THE data.file PARAMETER DOES NOT EXISTS: ", data.file, "\n\n============\n\n"))
     arg.check <- TRUE
 }
 tempo <- fun_param_check(data = path.out, class = "character", length = 1) ; eval(ee)
@@ -126,10 +208,17 @@ if(tempo$problem == FALSE & ! dir.exists(path.out)){
 # path.function1 fully tested above
 tempo <- fun_param_check(data = path.function1, class = "character", length = 1) ; eval(ee)
 tempo <- fun_param_check(data = project.name, class = "character", length = 1) ; eval(ee)
-tempo <- fun_param_check(data = label.size, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
-tempo <- fun_param_check(data = optional.text, class = "character", length = 1) ; eval(ee)
-tempo <- fun_param_check(data = slurm.loop.nb, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
 tempo <- fun_param_check(data = analysis.kind, options = c("longit", "valid_boot", "full_cross_validation"), length = 1) ; eval(ee)
+tempo <- fun_param_check(data = slurm.loop.nb, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
+tempo <- fun_param_check(data = optional.text, class = "character", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = activate.pdf, class = "logical", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = window.width, class = "numeric", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = window.height, class = "numeric", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = amplif, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
+tempo <- fun_param_check(data = bar.width, class = "numeric", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = log.scale, class = "logical", length = 1) ; eval(ee)
+tempo <- fun_param_check(data = facet.nrow, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
+tempo <- fun_param_check(data = facet.ncol, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
 if(any(arg.check) == TRUE){
     stop()
 }
@@ -139,16 +228,10 @@ if(any(arg.check) == TRUE){
 ini.time <- as.numeric(Sys.time()) # time of process begin, converted into seconds
 analysis.nb <- trunc(ini.time) # to provide a specific number ot each analysis
 log.file <- paste0("r_", project.name, "_", analysis.nb,"_report.txt")
-# name.dir <- paste0(project.name, "_", analysis.nb)
-# path.out<-paste0(path.out, name.dir)
-# suppressWarnings(dir.create(path.out))
-backup.name <- NULL # names of the object to save
+name.dir <- paste0(project.name, "_", analysis.nb)
+path.out<-paste0(path.out, name.dir)
+suppressWarnings(dir.create(path.out))
 fun_export_data(data = paste0("\n\n################################ ", log.file, " ################"), output = log.file, no.overwrite = FALSE, path = path.out, sep = 4)
-fun_export_data(path = path.out, data = "################################ INITIAL DATA", output = log.file)
-fun_export_data(path = path.out, data = paste0("SCRIPT USED: ", script), output = log.file)
-fun_export_data(path = path.out, data = paste0("KIND OF ANALYSIS PERFORMED: ", analysis.kind), output = log.file)
-fun_export_data(path = path.out, data = paste0("NUMBER OF LOOPS USED FOR COMPILATION: ", slurm.loop.nb), output = log.file)
-fun_export_data(path = path.out, data = paste0("FOR INFO: THE RESPONSE USED IS THE COLUMN: response_ASDAS_R_NR"), output = log.file)
 
 
 ################ Graphical parameter ignition
@@ -165,7 +248,7 @@ if(optional.text == "no.txt"){
 }else{
     fun_export_data(path = path.out, data = paste0("OPTIONAL TEXT:\n", optional.text), output = log.file)
 }
-par.ini <- fun_open_window(pdf.disp = activate.pdf, path.fun = path.out, pdf.name.file = paste0("final_graphs"), width.fun = 7, height.fun = 7, paper = "special", no.pdf.overwrite = TRUE, return.output = TRUE)$ini.par
+par.ini <- fun_open_window(pdf.disp = activate.pdf, path.fun = path.out, pdf.name.file = paste0("final_graphs"), width.fun = window.width, height.fun = window.height, paper = "special", no.pdf.overwrite = TRUE, return.output = TRUE)$ini.par
 if(activate.pdf == TRUE){
     pdf.nb <- dev.cur()
 }else{
@@ -174,39 +257,17 @@ if(activate.pdf == TRUE){
 
 ################ Data import
 
-load()
-
-
-
-
+load(data.file)
 
 ################ 3 Differential analysis with limma
 
 fun_export_data(path = path.out, data = "################################ LIMMA ANALYSIS", output = log.file)
 
-if(nrow(final.ttab) == 0 | nrow(na.omit(final.ttab)) == 0){
+if( ! any(ls() %in% "final.ttab.freq")){
+    fun_export_data(path = path.out, data = paste0("NO final.ttab.freq RESULTS FROM THE LIMMA ANALYSIS"), output = log.file)
+}else if(nrow(final.ttab.freq) == 0 | nrow(na.omit(final.ttab.freq)) == 0){
     fun_export_data(path = path.out, data = paste0("NO GENE LIST RESULTS FROM THE LIMMA ANALYSIS (P VALUES ABOVE 0.05 AFTER CORRECTION FOR INSTANCE)"), output = log.file)
-    fun_export_data(path = path.out, data = final.ttab, output = log.file)
-}else if(analysis.kind == "valid_boot"){
-    fun_export_data(path = path.out, data = paste0("BEWARE: NO COMPILATION FOR THE LIMMA ANALYSIS (", analysis.kind, " KIND OF ANALYSIS)"), output = log.file)
 }else{
-    tempo1 <- c(table(rownames(na.omit(final.ttab))))
-    tempo2 <- aggregate(final.ttab$adj.P.Val, list(rownames(final.ttab)), median, na.rm = TRUE)
-    names(tempo2) <- c("GENE", "ADJ_P_VALUE_MEDIAN")
-    tempo2 <- data.frame(
-        tempo2,
-        ADJ_P_VALUE_CI95_INF = aggregate(final.ttab$adj.P.Val, list(rownames(final.ttab)), quantile, probs = 0.025, na.rm = TRUE)$x,
-        ADJ_P_VALUE_CI95_SUP = aggregate(final.ttab$adj.P.Val, list(rownames(final.ttab)), quantile, probs = 0.975, na.rm = TRUE)$x
-    )
-    final.ttab.freq <- data.frame(tempo2, NB = unname(tempo1), FREQ = unname(tempo1)/slurm.loop.nb)
-    rownames(final.ttab.freq) <- NULL
-    fun_export_data(path = path.out, data = "FREQUENCIES (RELATED TO NB OF TIME GENES HAVE BEEN SELECTED PER LOOP) AND P VALUES MEDIANS & CI: ", output = log.file)
-    fun_export_data(path = path.out, data = final.ttab.freq, output = log.file)
-    
-    # horiz barplot
-    bar.width <- 0.5
-    log.scale <- FALSE
-    amplif <- label.size
     tempo.gg.name <- "gg.indiv.plot."
     tempo.gg.count <- 0
     if(nrow(final.ttab.freq) > 20){
@@ -214,21 +275,41 @@ if(nrow(final.ttab) == 0 | nrow(na.omit(final.ttab)) == 0){
         tempo2 <- tempo1[order(names(tempo1), decreasing = TRUE)]
         tempo3 <- as.numeric(names(cumsum(tempo2)[cumsum(tempo2) <= 20]))
         tempo.plot <- final.ttab.freq[round(final.ttab.freq$FREQ, 7) %in% round(tempo3, 7), ]
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
-        tempo.txt <- paste0("BEWARE: ONLY THE ", nrow(tempo.plot), " MOST FREQUENT GENES PLOTTED, AMONG ", nrow(final.ttab.freq))
+        if(length(tempo3) == 0){
+            tempo.plot <- final.ttab.freq
+            tempo.txt <- paste0("BEWARE: UNIQUE FREQUENCY ", names(tempo2), " FOR ALL THE ", unname(tempo2), " GENES -> CRAZY PLOT")
+        }else{
+            tempo.txt <- paste0("BEWARE: ONLY THE ", nrow(tempo.plot), " MOST FREQUENT GENES PLOTTED, AMONG ", nrow(final.ttab.freq))
+        }
     }else{
         tempo.plot <- final.ttab.freq
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data =  tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
         tempo.txt <- ""
     }
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_bar(stat = "identity", position = "dodge", color = "black", fill = grey(0.8), width = bar.width))
-    # assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_errorbar(ggplot2::aes(ymin = PROP.CI95.inf, ymax = PROP.CI95.sup), position = ggplot2::position_dodge(width = bar.width), color = "black", width = 0))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("LIMMA GENE LIST\n(PROP OF TIMES THE GENE IS SIGNIFICANT FOR ", slurm.loop.nb, " LOOPS)\n", tempo.txt)))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab(""))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("PROPORTION"))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
+        # plot.margin = margin(up.space.mds, right.space.mds, down.space.mds, left.space.mds, "inches"), 
+        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
+        axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
+        axis.text = ggplot2::element_text(size = amplif * 0.75), 
+        axis.title = ggplot2::element_text(size = amplif), 
+        legend.text = ggplot2::element_text(size = amplif), 
+        legend.title = ggplot2::element_text(size = amplif), 
+        strip.text = ggplot2::element_text(size = amplif), 
+        legend.key = ggplot2::element_blank(), 
+        panel.background = ggplot2::element_rect(fill = "white"), 
+        panel.border = ggplot2::element_rect(colour = "black", fill = NA), 
+        panel.grid.major.x = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.major.y = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.minor.x = ggplot2::element_blank(), 
+        panel.grid.minor.y = ggplot2::element_blank(), 
+        strip.background = ggplot2::element_rect(fill = "white", colour = "black")
+    ))
     coord <- ggplot2::ggplot_build(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))$data # to have the summary statistics of the plot. is interesting: x = coord[[2]]$x, y = coord[[2]]$ymax_final
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[1]]$x, y = coord[[1]]$ymax * 1.05, label = round(coord[[1]]$y, 3), size = amplif/2, color = "black", vjust = "center", hjust = "right")) # beware: no need of order() for labels because coord[[1]]$x set the order
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[1]]$x, y = coord[[1]]$ymax * 1.05, label = round(coord[[1]]$y, 3), size = amplif/3, color = "black", vjust = "center", hjust = "left")) # beware: no need of order() for labels because coord[[1]]$x set the order
     if(log.scale == TRUE){
         assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
     }else{
@@ -238,11 +319,9 @@ if(nrow(final.ttab) == 0 | nrow(na.omit(final.ttab)) == 0){
     if(activate.pdf == TRUE){
         tempo <- dev.set(pdf.nb) # assign to avoid the message
     }else{
-        windows(5, 5)
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
     }
     print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
-    fun_export_data(path = path.out, data = paste0("SEE THE final.ttab AND THE final.ttab.freq OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-    backup.name <- c(backup.name, "final.ttab.freq")
 }
 
 
@@ -255,50 +334,98 @@ if(nrow(final.ttab) == 0 | nrow(na.omit(final.ttab)) == 0){
 
 fun_export_data(path = path.out, data = "################################ RANDOM FOREST LEARNING USING THE DISCOVERY SET", output = log.file)
 
-#mod.gene.names
+#select.gene.curve (nb of features)
 
-fun_export_data(path = path.out, data = "################ LIST OF GENES SELECTED DURING THE MODEL ELABORATION", output = log.file)
+fun_export_data(path = path.out, data = "################ OPTIMAL NUMBER OF GENES DURING THE MODEL ELABORATION", output = log.file)
 
-if(nrow(final.mod.gene.names) == 0 | nrow(na.omit(final.mod.gene.names)) == 0){
-    fun_export_data(path = path.out, data = paste0("NO GENE LIST RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
-    fun_export_data(path = path.out, data = final.mod.gene.names, output = log.file)
-}else if(analysis.kind == "valid_boot"){
-    fun_export_data(path = path.out, data = paste0("BEWARE: NO COMPILATION FOR THE THE RANDOM FOREST TRAINING (", analysis.kind, " KIND OF ANALYSIS)"), output = log.file)
-    # fun_export_data(path = path.out, data = final.mod.gene.names, output = log.file)
+if( ! any(ls() %in% "final.select.gene.curve.median")){
+    fun_export_data(path = path.out, data = paste0("NO final.select.gene.curve.median RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
+}else if(nrow(final.select.gene.curve.median) == 0 | nrow(na.omit(final.select.gene.curve.median)) == 0){
+    fun_export_data(path = path.out, data = paste0("NO OPTIMAL RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
 }else{
-    tempo1 <- c(table(final.mod.gene.names$GENE, useNA = "no"))
-    final.mod.gene.names.freq <- data.frame(GENE = names(tempo1), NB = unname(tempo1), FREQ = unname(tempo1)/slurm.loop.nb)
-    final.mod.gene.names.freq <- final.mod.gene.names.freq[order(final.mod.gene.names.freq$FREQ, decreasing = TRUE), ]
-    rownames(final.mod.gene.names.freq) <- NULL
-    fun_export_data(path = path.out, data = "FREQUENCIES: ", output = log.file)
-    fun_export_data(path = path.out, data = final.mod.gene.names.freq, output = log.file)
-    
-    # horiz barplot
-    bar.width <- 0.5
-    log.scale <- FALSE
-    amplif <- label.size
     tempo.gg.name <- "gg.indiv.plot."
     tempo.gg.count <- 0
-    if(nrow(final.mod.gene.names.freq) > 20){
-        tempo1 <- table(final.mod.gene.names.freq$FREQ, useNA = "no")
-        tempo2 <- tempo1[order(names(tempo1), decreasing = TRUE)]
-        tempo3 <- as.numeric(names(cumsum(tempo2)[cumsum(tempo2) <= 20]))
-        tempo.plot <- final.mod.gene.names.freq[round(final.mod.gene.names.freq$FREQ, 7) %in% round(tempo3, 7), ]
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
-        tempo.txt <- paste0("BEWARE: ONLY THE ", nrow(tempo.plot), " MOST FREQUENT GENES PLOTTED, AMONG ", nrow(final.mod.gene.names.freq))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = final.select.gene.curve.median, mapping = ggplot2::aes(x = X, y = MEDIAN)))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_point(color = "red", shape = 16, size = 2))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(color = "red"))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_ribbon(ggplot2::aes(ymin = CI95.inf, ymax = CI95.sup), color = NA, fill = "red", alpha = 0.1))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("OPTIMAL NUMBER OF GENES (n = ", slurm.loop.nb,")")))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab("NUMBER OF GENES"))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("MEDIAN & 95CI"))
+    # assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
+        # plot.margin = margin(up.space.mds, right.space.mds, down.space.mds, left.space.mds, "inches"), 
+        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
+        axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
+        axis.text = ggplot2::element_text(size = amplif * 0.75), 
+        axis.title = ggplot2::element_text(size = amplif), 
+        legend.text = ggplot2::element_text(size = amplif), 
+        legend.title = ggplot2::element_text(size = amplif), 
+        strip.text = ggplot2::element_text(size = amplif), 
+        legend.key = ggplot2::element_blank(), 
+        panel.background = ggplot2::element_rect(fill = "white"), 
+        panel.border = ggplot2::element_rect(colour = "black", fill = NA), 
+        panel.grid.major.x = ggplot2::element_line(colour = "white"), 
+        panel.grid.major.y = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.minor.x = ggplot2::element_blank(), 
+        panel.grid.minor.y = ggplot2::element_blank(), 
+        strip.background = ggplot2::element_rect(fill = "white", colour = "black")
+    ))
+    if(log.scale == TRUE){
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
     }else{
-        tempo.plot <- final.mod.gene.names.freq
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data =  tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
-        tempo.txt <- ""
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_x_continuous(expand = c(0,0)))
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_y_continuous(expand = c(0,0), breaks = scales::extended_breaks(10)))
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(xlim= c(min(final.select.gene.curve.median$X, na.rm = TRUE), max(final.select.gene.curve.median$X, na.rm = TRUE)), ylim = c(0, max(final.select.gene.curve.median$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
+    }
+    if(activate.pdf == TRUE){
+        tempo <- dev.set(pdf.nb) # assign to avoid the message
+    }else{
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
     }
+    print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
+
+}
+
+
+#mod.gene.names (final gene list)
+
+fun_export_data(path = path.out, data = "################ LIST OF GENES SELECTED DURING THE MODEL ELABORATION", output = log.file)
+
+if( ! any(ls() %in% "final.mod.gene.names.freq")){
+    fun_export_data(path = path.out, data = paste0("NO final.mod.gene.names.freq RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
+}else if(nrow(final.mod.gene.names.freq) == 0 | nrow(na.omit(final.mod.gene.names.freq)) == 0){
+    fun_export_data(path = path.out, data = paste0("NO GENE LIST RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
+}else{
+    tempo.gg.name <- "gg.indiv.plot."
+    tempo.gg.count <- 0
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data =  final.mod.gene.names.freq, mapping = ggplot2::aes(x = reorder(GENE, FREQ), y = FREQ))) # reorder from higher to lower
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_bar(stat = "identity", position = "dodge", color = "black", fill = grey(0.8), width = bar.width))
     # assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_errorbar(ggplot2::aes(ymin = PROP.CI95.inf, ymax = PROP.CI95.sup), position = ggplot2::position_dodge(width = bar.width), color = "black", width = 0))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("RANDOM FOREST MODELISATION GENE LIST\n(PROP OF TIMES THE GENES HAVE BEEN SELECTED FOR ", slurm.loop.nb, " LOOPS)\n", tempo.txt)))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("RANDOM FOREST MODELISATION GENE LIST\n(PROP OF TIMES THE GENES HAVE BEEN SELECTED FOR ", slurm.loop.nb, " LOOPS)")))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab(""))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("PROPORTION"))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
+    # assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
+        # plot.margin = margin(up.space.mds, right.space.mds, down.space.mds, left.space.mds, "inches"), 
+        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
+        axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
+        axis.text = ggplot2::element_text(size = amplif * 0.75), 
+        axis.title = ggplot2::element_text(size = amplif), 
+        legend.text = ggplot2::element_text(size = amplif), 
+        legend.title = ggplot2::element_text(size = amplif), 
+        strip.text = ggplot2::element_text(size = amplif), 
+        legend.key = ggplot2::element_blank(), 
+        panel.background = ggplot2::element_rect(fill = "white"), 
+        panel.border = ggplot2::element_rect(colour = "black", fill = NA), 
+        panel.grid.major.x = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.major.y = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.minor.x = ggplot2::element_blank(), 
+        panel.grid.minor.y = ggplot2::element_blank(), 
+        strip.background = ggplot2::element_rect(fill = "white", colour = "black")
+    ))
     coord <- ggplot2::ggplot_build(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))$data # to have the summary statistics of the plot. is interesting: x = coord[[2]]$x, y = coord[[2]]$ymax_final
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[1]]$x, y = coord[[1]]$ymax * 1.05, label = round(coord[[1]]$y, 3), size = amplif/2, color = "black", vjust = "center", hjust = "right")) # beware: no need of order() for labels because coord[[1]]$x set the order
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[1]]$x, y = coord[[1]]$ymax * 1.05, label = round(coord[[1]]$y, 3), size = amplif/3, color = "black", vjust = "center", hjust = "left")) # beware: no need of order() for labels because coord[[1]]$x set the order
     if(log.scale == TRUE){
         assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
     }else{
@@ -308,11 +435,9 @@ if(nrow(final.mod.gene.names) == 0 | nrow(na.omit(final.mod.gene.names)) == 0){
     if(activate.pdf == TRUE){
         tempo <- dev.set(pdf.nb) # assign to avoid the message
     }else{
-        windows(5, 5)
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
     }
     print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
-    fun_export_data(path = path.out, data = paste0("SEE THE final.mod.gene.names AND THE final.mod.gene.names.freq OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-    backup.name <- c(backup.name, "final.mod.gene.names.freq")
 }
 
 
@@ -320,37 +445,11 @@ if(nrow(final.mod.gene.names) == 0 | nrow(na.omit(final.mod.gene.names)) == 0){
 
 fun_export_data(path = path.out, data = "################ GENE IMPORTANCE DURING THE MODEL ELABORATION", output = log.file)
 
-if(nrow(final.gene.importance) == 0 | nrow(na.omit(final.gene.importance)) == 0){
-    fun_export_data(path = path.out, data = paste0("NO GENE IMPORTANCE RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
-    fun_export_data(path = path.out, data = final.gene.importance, output = log.file)
-}else if(analysis.kind == "valid_boot"){
-    fun_export_data(path = path.out, data = paste0("BEWARE: NO COMPILATION FOR THE THE RANDOM FOREST TRAINING (", analysis.kind, " KIND OF ANALYSIS)"), output = log.file)
-    # fun_export_data(path = path.out, data = final.gene.importance, output = log.file)
+if( ! any(ls() %in% "final.gene.importance.median")){
+    fun_export_data(path = path.out, data = paste0("NO final.gene.importance.median RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
+}else if(nrow(final.gene.importance.median) == 0 | nrow(na.omit(final.gene.importance.median)) == 0){
+    fun_export_data(path = path.out, data = paste0("NO GENE LIST RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
 }else{
-    tempo1 <- aggregate(final.gene.importance$importance, list(final.gene.importance$features), median, na.rm = TRUE)
-    names(tempo1) <- c("GENE", "MEDIAN")
-    tempo2 <- aggregate(final.gene.importance$features, list(final.gene.importance$features), function(x){length(x[ ! is.na(x)])}) # nb
-    if(identical(tempo1$GENE, tempo2$Group.1)){
-        tempo1 <- data.frame(tempo1, NB = tempo2$x)
-    }else{
-        cat(paste0("\n\n============\n\nERROR: IN THE PLOT OF THE final.gene.importance OBJECT, tempo1 AND tempo2 SHOULD HAVE THE SAME GENE COLUMN: \n\n============\n\n"))
-        tempo1
-        tempo2
-        cat("\n\n============\n\n")
-        stop()
-    }
-    final.gene.importance.median <- cbind(tempo1,
-                    CI95.inf = aggregate(final.gene.importance$importance, list(final.gene.importance$features), quantile, probs = 0.025, na.rm = TRUE)[, 2],
-                    CI95.sup = aggregate(final.gene.importance$importance, list(final.gene.importance$features), quantile, probs = 0.975, na.rm = TRUE)[, 2],
-                    stringsAsFactors = FALSE
-    )
-    fun_export_data(path = path.out, data = "IMPORTANCE: ", output = log.file)
-    fun_export_data(path = path.out, data = final.gene.importance.median, output = log.file)
-    
-    # barplot + CI
-    bar.width <- 0.5
-    log.scale <- FALSE
-    amplif <- label.size
     tempo.gg.name <- "gg.indiv.plot."
     tempo.gg.count <- 0
     if(nrow(final.gene.importance.median) > 20){
@@ -370,9 +469,7 @@ if(nrow(final.gene.importance) == 0 | nrow(na.omit(final.gene.importance)) == 0)
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("GENE IMPORTANCE (NUMBERS ARE NUMBER OF TIMES THE GENE IS SEEN FOR ", slurm.loop.nb, " LOOPS)")))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab(""))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("MEDIAN & 95CI"))
-#     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
-        # plot.margin = margin(up.space.mds, right.space.mds, down.space.mds, left.space.mds, "inches"), 
         plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
         axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
         axis.text = ggplot2::element_text(size = amplif * 0.75), 
@@ -395,113 +492,53 @@ if(nrow(final.gene.importance) == 0 | nrow(na.omit(final.gene.importance)) == 0)
         assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
     }else{
         assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_y_continuous(expand = c(0,0), breaks = scales::extended_breaks(10)))
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(ylim = c(0, max(final.gene.importance.median$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(ylim = c(0, max(tempo.plot$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
     }
     if(activate.pdf == TRUE){
         tempo <- dev.set(pdf.nb) # assign to avoid the message
     }else{
-        windows(5, 5)
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
     }
     print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
-    fun_export_data(path = path.out, data = paste0("SEE THE final.gene.importance AND THE final.gene.importance.median OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-    backup.name <- c(backup.name, "final.gene.importance.median")
 }
 
 
-#select.gene.curve
-
-fun_export_data(path = path.out, data = "################ OPTIMAL NUMBER OF GENES DURING THE MODEL ELABORATION", output = log.file)
+# plotting the heatmap adnd corplot
+fun_export_data(path = path.out, data = "################ HEATMAP AND CORRELATION PLOT ", output = log.file)
 
-if(nrow(final.select.gene.curve) == 0 | nrow(na.omit(final.select.gene.curve)) == 0){
-    fun_export_data(path = path.out, data = paste0("NO OPTIMAL RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
-    fun_export_data(path = path.out, data = final.select.gene.curve, output = log.file)
-}else if(analysis.kind == "valid_boot"){
-    fun_export_data(path = path.out, data = paste0("BEWARE: NO COMPILATION FOR THE THE RANDOM FOREST TRAINING (", analysis.kind, " KIND OF ANALYSIS)"), output = log.file)
-    # fun_export_data(path = path.out, data = final.select.gene.curve, output = log.file)
+if( ! (any(ls() %in% "nano.rf.selected.genes") | any(ls() %in% "optimal.gene.nb") | any(ls() %in% "selected.gene.names"))){
+    fun_export_data(path = path.out, data = paste0("NO nano.rf.selected.genes RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
+}else if(nrow(nano.rf.selected.genes) == 0 | nrow(na.omit(nano.rf.selected.genes)) == 0 | length(optimal.gene.nb) == 0){
+    fun_export_data(path = path.out, data = paste0("NO GENE LIST RESULTS FROM THE RANDOM FOREST TRAINING"), output = log.file)
 }else{
-    tempo1 <- aggregate(final.select.gene.curve$y, list(final.select.gene.curve$x), median, na.rm = TRUE)
-    names(tempo1) <- c("X", "MEDIAN")
-    final.select.gene.curve.median <- cbind(tempo1,
-                                          CI95.inf = aggregate(final.select.gene.curve$y, list(final.select.gene.curve$x), quantile, probs = 0.025, na.rm = TRUE)[, 2],
-                                          CI95.sup = aggregate(final.select.gene.curve$y, list(final.select.gene.curve$x), quantile, probs = 0.975, na.rm = TRUE)[, 2],
-                                          stringsAsFactors = FALSE
-    )
-    fun_export_data(path = path.out, data = "OPTIMAL NUMBER OF GENES: ", output = log.file)
-    fun_export_data(path = path.out, data = final.select.gene.curve.median, output = log.file)
-    
-    # line + CI area
-    bar.width <- 0.5
-    log.scale <- FALSE
-    amplif <- label.size
-    tempo.gg.name <- "gg.indiv.plot."
-    tempo.gg.count <- 0
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = final.select.gene.curve.median, mapping = ggplot2::aes(x = X, y = MEDIAN)))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_point(color = "red", shape = 16, size = 2))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(color = "red"))
-    # assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_errorbar(ggplot2::aes(ymin = CI95.inf, ymax = CI95.sup), position = ggplot2::position_dodge(), color = "black", width = 0))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_ribbon(ggplot2::aes(ymin = CI95.inf, ymax = CI95.sup), color = NA, fill = "red", alpha = 0.1))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("OPTIMAL NUMBER OF GENES (n = ", slurm.loop.nb,")")))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab("NUMBER OF GENES"))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("MEDIAN & 95CI"))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
-    if(log.scale == TRUE){
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
+    tempo.data <- dat[order(dat$Y), ]
+    annot.rows <- dat[, "Y", drop = FALSE] # allow to keep the data.frame structure
+    colnames(annot.rows) <- "ASDAS R/NR"
+    if(length(na.omit(match(selected.gene.names, names(tempo.data)))) != length(selected.gene.names)){
+        tempo.cat <- cat(paste0("\n\n============\n\nERROR: THE COLUMN NAMES OF tempo.data OBJECT SHOULD MATCH ALL THE NAMES IN selected.gene.names.\n
+                                    PROBLEM IN selected.gene.names:\n", selected.gene.names[ ! selected.gene.names %in% names(tempo.data)], "
+                                    PROBLEM IN names(tempo.data):\n", names(tempo.data)[ ! names(tempo.data) %in% selected.gene.names], "  
+                                    \n\n============\n\n"))
+        fun_export_data(path = path.out, data = tempo.cat, output = log.file)
+        stop(tempo.cat)
     }else{
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_y_continuous(expand = c(0,0), breaks = scales::extended_breaks(10)))
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(xlim= c(min(final.select.gene.curve.median$X, na.rm = TRUE), max(final.select.gene.curve.median$X, na.rm = TRUE)), ylim = c(0, max(final.select.gene.curve.median$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
+        fun_export_data(path = path.out, data = "IN THE nano.rf.selected.genes OBJECT, COLUMN ARE ORDERED AS IN THE FREQ OF THE GENE LIST selected.gene.names", output = log.file)
+        nano.rf.selected.genes <- tempo.data[, match(selected.gene.names, names(tempo.data))]
     }
-    if(activate.pdf == TRUE){
-        tempo <- dev.set(pdf.nb) # assign to avoid the message
-    }else{
-        windows(5, 5)
-    }
-    print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
-    fun_export_data(path = path.out, data = paste0("SEE THE final.select.gene.curve AND THE final.select.gene.curve.median OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-    backup.name <- c(backup.name, "final.select.gene.curve.median")
+    ann_colors = list("ASDAS R/NR" = c(R = "steelblue", NR = "tomato"))
+    tempo <- dev.set(pdf.nb) # assign to avoid the message
+    heatmap.plot <- pheatmap(t(scale(nano.rf.selected.genes)), silent = TRUE, annotation_col = annot.rows, cluster_cols = FALSE, show_colnames = FALSE, border_color = NA,  color = colorRampPalette(c("red", "black", "green"))(499), annotation_colors = ann_colors, fontsize_row = amplif, fontsize_col = amplif)
+    print(ggplot2::ggplot()+ggplot2::theme_classic())
+    print(heatmap.plot)
+    tempo <- dev.set(pdf.nb) # assign to avoid the message
+    corrplot::corrplot(cor(nano.rf.selected.genes), tl.col = "black", tl.cex = amplif / 10)
 }
 
 
-# plotting the heatmap adnd corplot
-fun_export_data(path = path.out, data = "################ HEATMAP AND CORRELATION PLOT ", output = log.file)
-if(analysis.kind == "valid_boot"){
-    if(nrow(final.mod.gene.names) == 0 | nrow(na.omit(final.mod.gene.names)) == 0){
-        fun_export_data(path = path.out, data = paste0("NO GENE LIST FROM THE RANDOM FOREST TRAINING"), output = log.file)
-        fun_export_data(path = path.out, data = final.mod.gene.names, output = log.file)
-    }else{
-        fun_export_data(path = path.out, data = paste0("BEWARE: NO COMPILATION FOR THE THE RANDOM FOREST TRAINING (", analysis.kind, " KIND OF ANALYSIS)"), output = log.file)
-        # fun_export_data(path = path.out, data = final.mod.gene.names, output = log.file)
-        tempo.data <- dat[order(dat$Y), ]
-        annot.rows <- dat[, "Y", drop = FALSE] # allow to keep the data.frame structure
-        colnames(annot.rows) <- "ASDAS R/NR"
-        nano.rf.selected.genes <- tempo.data[, final.mod.gene.names$GENE]
-    }
-}else{
-    if(nrow(final.mod.gene.names.freq) == 0 | nrow(na.omit(final.mod.gene.names.freq)) == 0){
-        fun_export_data(path = path.out, data = paste0("NO GENE LIST FROM THE RANDOM FOREST TRAINING"), output = log.file)
-        fun_export_data(path = path.out, data = final.mod.gene.names.freq, output = log.file)
-    }else{
-        fun_export_data(path = path.out, data = paste0("HEATMAP AND CORRELATION HAVE BEEN PLOTTED USING THE final.mod.gene.names.freq FILE AND THE FREQUENCY CUTOFF ", cut.off.freq.for.selected.genes), output = log.file)
-        tempo.data <- dat[order(dat$Y), ]
-        annot.rows <- dat[, "Y", drop = FALSE] # allow to keep the data.frame structure
-        colnames(annot.rows) <- "ASDAS R/NR"
-        nano.rf.selected.genes <- tempo.data[, final.mod.gene.names.freq$GENE[final.mod.gene.names.freq$FREQ >= cut.off.freq.for.selected.genes]]
-        ann_colors = list("ASDAS R/NR" = c(R = "steelblue", NR = "tomato"))
-        # pheatmap(t(scale(nano.rf.selected.genes)), silent = TRUE, annotation_col = annot.rows, cluster_cols = FALSE, show_colnames = FALSE, border_color = NA,  color = colorRampPalette(c("red", "black", "green"))(499), annotation_colors = ann_colors, fontsize_row = label.size, fontsize_col = label.size)
-        tempo <- dev.set(pdf.nb) # assign to avoid the message
-        heatmap.plot <- pheatmap(t(scale(nano.rf.selected.genes)), silent = TRUE, annotation_col = annot.rows, cluster_cols = FALSE, show_colnames = FALSE, border_color = NA,  color = colorRampPalette(c("red", "black", "green"))(499), annotation_colors = ann_colors, fontsize_row = label.size, fontsize_col = label.size)
-        print(ggplot2::ggplot()+ggplot2::theme_classic())
-        print(heatmap.plot)
-        tempo <- dev.set(pdf.nb) # assign to avoid the message
-        corrplot::corrplot(cor(nano.rf.selected.genes), tl.col = "black", tl.cex = label.size / 10)
-        fun_export_data(path = path.out, data = paste0("SEE THE nano.rf.selected.genes OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-        backup.name <- c(backup.name, "nano.rf.selected.genes")
-    }
-}
-    
+
 ######## 4.2 Validate the model
 # 
 fun_export_data(path = path.out, data = "################################ VALIDATION", output = log.file)
-fun_export_data(path = path.out, data = "################ ROC CURVES", output = log.file)
 data.kind <- c("genes", "crp", "genes.crp") # beware, the order is important
 ana.kind <- c("rf", "logreg", "rpart")
 for(i0 in 1:length(data.kind)){
@@ -528,25 +565,22 @@ for(i0 in 1:length(data.kind)){
             ))
             
             # roc curves plot
-            bar.width <- 0.5
-            log.scale <- FALSE
-            amplif <- label.size
             tempo.gg.name <- "gg.indiv.plot."
             tempo.gg.count <- 0
             assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = get(tempo.name.median), mapping = ggplot2::aes(x = X_MEDIAN, y = Y_MEDIAN)))
-            assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_segment(data = data.frame(x = 0, y = 0, xend = 1, yend = 1), mapping = ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "black", linetype = "dashed"))
             for(i2 in 1:slurm.loop.nb){
                 # roc curves
                 if(nrow(get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))) > 0){
                     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(data = get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))[order(get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))$x, get(paste0("loop", i2, "_data.roc", i0, ".", data.kind[i0], ".", ana.kind[i1]))$y, decreasing = FALSE), ], mapping = ggplot2::aes(x = x, y = y), color = "grey", alpha = 0.5))
                 }
             }
-            assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(color = "red"))
+            assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_segment(data = data.frame(x = 0, y = 0, xend = 1, yend = 1), mapping = ggplot2::aes(x = x, y = y, xend = xend, yend = yend), color = "black", linetype = "dashed"))
+            assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_line(color = "red", size =0.75))
             assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("ROC CURVE ", data.kind[i0], " AND ", ana.kind[i1], " (n = ", slurm.loop.nb,")\nAUC = ",
-                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$MEDIAN, 3), " (",
-                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$CI95.inf, 3), " - ",
-                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$CI95.sup, 3), ")"
-                    )))
+                                                                                                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$MEDIAN, 3), " (",
+                                                                                                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$CI95.inf, 3), " - ",
+                                                                                                        round(get(paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))$CI95.sup, 3), ")"
+            )))
             assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab("FALSE POSITIVE RATE (1 - SPECIFICITY)"))
             assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("TRUE POSITIVE RATE (SENSITIVITY)"))
             assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
@@ -556,54 +590,25 @@ for(i0 in 1:length(data.kind)){
             if(activate.pdf == TRUE){
                 tempo <- dev.set(pdf.nb) # assign to avoid the message
             }else{
-                windows(5, 5)
+                fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
             }
             print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
         }
-        # fun_export_data(path = path.out, data = paste0("SEE THE ", tempo.name.median, " AND THE ", paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"), " OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-        backup.name <- c(backup.name, tempo.name.median, paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))
-        
-        # confusion matrix
-        fun_export_data(path = path.out, data = paste0("CONFUSION MATRIX (CI95):"), output = log.file)
-        tempo.name <- paste0("final.confmat", i0, ".", data.kind[i0], ".", ana.kind[i1])
-        tempo1 <- apply(get(tempo.name)[1:4], 2, median, na.rm = TRUE)
-        tempo2 <- apply(get(tempo.name)[1:4], 2, quantile, probs = 0.025, na.rm = TRUE)
-        tempo3 <- apply(get(tempo.name)[1:4], 2, quantile, probs = 0.975, na.rm = TRUE)
-        tempo4 <- paste(tempo1, tempo2, sep = " (")
-        tempo4 <- paste(tempo4, tempo3, sep = " - ")
-        tempo4 <- paste0(tempo4, ")")
-        assign(paste0("final.confmat", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"), as.table(matrix(tempo4, ncol = 2, dimnames = list(TRUTH = c("NR", "R"), PREDICTION = c("NR", "R")))))
-        fun_export_data(path = path.out, data = get(paste0("final.confmat", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median")), output = log.file, rownames.kept = TRUE)
-        fun_export_data(path = path.out, data = paste0("SEE THE ", tempo.name.median, " AND THE ", paste0("final.auc", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"), " OBJECT IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-        backup.name <- c(backup.name, tempo.name.median, paste0("final.confmat", i0, ".", data.kind[i0], ".", ana.kind[i1], ".median"))
     }
 }
 
 
-
 ######## 4.6 Plot top feature boxplots
 
 fun_export_data(path = path.out, data = "################ TOP PREDICTOR GENES", output = log.file)
-boxdat <- data.frame(ASDAS = df.nano$response_ASDAS_R_NR, 
-                     Cohort.name = df.nano$cohort_id,
-                     RF.COHORTE = df.nano$training_validation, 
-                     nano.rf.selected.genes
-)
-boxdat_melt <- reshape2::melt(boxdat, id.vars = c("ASDAS", "Cohort.name", "RF.COHORTE"), variable.name = "Gene")
 
 # boxplot
-bar.width <- 0.5
-log.scale <- FALSE
-amplif <- label.size
-facet.nrow = 4
-facet.ncol = 4
-boxdat_melt$Gene <- as.character(boxdat_melt$Gene)
-pages.print.nb <- ceiling(length(unique(boxdat_melt$Gene)) / (facet.nrow * facet.ncol))
+pages.print.nb <- ceiling(length(levels(boxdat_melt$Gene)) / (facet.nrow * facet.ncol))
 increm <- facet.nrow * facet.ncol
 for(i0 in 1:pages.print.nb){
     select <- (increm * i0 - increm + 1):(increm * i0)
-    if(max(select) > length(unique(boxdat_melt$Gene))){
-        select <- (increm * i0 - increm + 1):length(unique(boxdat_melt$Gene))
+    if(max(select) > length(levels(boxdat_melt$Gene))){
+        select <- (increm * i0 - increm + 1):length(levels(boxdat_melt$Gene))
     }
     tempo.gg.name <- "gg.indiv.plot."
     tempo.gg.count <- 0
@@ -611,137 +616,103 @@ for(i0 in 1:pages.print.nb){
         if(i0 == 1){
             fun_export_data(path = path.out, data = paste0("WITH ", analysis.kind, " KIND OF ANALYSIS, BOXPLOTS ARE PROVIDED USING THE TRAINING AND THE VALIDATING COHORTES, AS DEFINED IN THE training_validation COLUMN OF THE df.nano DATA FRAME", path.out), output = log.file)
         }
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = boxdat_melt[boxdat_melt$Gene %in% unique(boxdat_melt$Gene)[select], ], mapping = ggplot2::aes(x=ASDAS, y=value, colour=ASDAS, shape = RF.COHORTE, linetype = RF.COHORTE)))
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = boxdat_melt[boxdat_melt$Gene %in% levels(boxdat_melt$Gene)[select], ], mapping = ggplot2::aes(x=ASDAS, y=value, colour=ASDAS, shape = RF.COHORTE, linetype = RF.COHORTE)))
     }else if(analysis.kind == "full_cross_validation"){
         if(i0 == 1){
-            fun_export_data(path = path.out, data = paste0("WITH ", analysis.kind, " KIND OF ANALYSIS, BOXPLOTS CANNOT BE DIFFERENCIATED BETWEEN THE TRAINING AND THE VALIDATING COHORTES", path.out), output = log.file)
+            fun_export_data(path = path.out, data = paste0("WITH ", analysis.kind, " KIND OF ANALYSIS, BOXPLOTS CANNOT BE DIFFERENCIATED BETWEEN THE TRAINING AND THE VALIDATING COHORTES"), output = log.file)
         }
-        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = boxdat_melt[boxdat_melt$Gene %in% unique(boxdat_melt$Gene)[select], ], mapping = ggplot2::aes(x=ASDAS, y=value, colour=ASDAS)))
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = boxdat_melt[boxdat_melt$Gene %in% levels(boxdat_melt$Gene)[select], ], mapping = ggplot2::aes(x=ASDAS, y=value, colour=ASDAS)))
     }else{
         stop(paste0("\n\n============\n\nERROR 1 IN R CODE\n\n============\n\n"))
     }
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggbeeswarm::geom_beeswarm(dodge.width = 0.75, alpha = 0.7))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_boxplot(outlier.shape = NA, fill = NA))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::facet_wrap(~ Gene, nrow = facet.nrow, ncol = facet.ncol, scales = "fixed", shrink = FALSE)) # scales = "fixed", shrink = FALSE to have same scale range
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::facet_wrap(~ Gene, nrow = facet.nrow, ncol = facet.ncol, scales = "fixed", shrink = FALSE, drop = TRUE)) # scales = "fixed", shrink = FALSE to have same scale range
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle("SELECTED GENES (EACH DOT IS A PATIENT"))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("NANOSTRING VALUE"))
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_bw())
     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
-        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = label.size), 
-        axis.text = ggplot2::element_text(size = label.size), 
-        axis.title = ggplot2::element_text(size = label.size), 
-        legend.text = ggplot2::element_text(size = label.size), 
-        legend.title = ggplot2::element_text(size = label.size), 
-        strip.text = ggplot2::element_text(size = label.size)
+        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif), 
+        axis.text = ggplot2::element_text(size = amplif), 
+        axis.title = ggplot2::element_text(size = amplif), 
+        legend.text = ggplot2::element_text(size = amplif), 
+        legend.title = ggplot2::element_text(size = amplif), 
+        strip.text = ggplot2::element_text(size = amplif)
     ))
     if(activate.pdf == TRUE){
         tempo <- dev.set(pdf.nb) # assign to avoid the message
     }else{
-        windows(5, 5)
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
     }
     print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
 }
-fun_export_data(path = path.out, data = paste0("SEE THE nano.rf.selected.genes OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
 
 ######## 4.7 Frequencies associated to each predictor
 
 fun_export_data(path = path.out, data = "################ FINAL LIST OF PREDICTOR GENES AND FREQUENCIES", output = log.file)
 
-if(nrow(final.gene.list) == 0 | nrow(na.omit(final.gene.list)) == 0){
-  fun_export_data(path = path.out, data = paste0("NO FINAL LIST OF PREDICTOR GENES AND FREQUENCIES (NO ROWS)"), output = log.file)
-  fun_export_data(path = path.out, data = final.gene.list, output = log.file)
+if( ! any(ls() %in% "final.gene.list.median")){
+    fun_export_data(path = path.out, data = paste0("NO final.gene.list.median RESULTS FROM THE RANDOM FOREST VALIDATION"), output = log.file)
+}else if(nrow(final.gene.list.median) == 0 | nrow(na.omit(final.gene.list.median)) == 0){
+    fun_export_data(path = path.out, data = paste0("NO FINAL LIST OF PREDICTOR GENES AND FREQUENCIES (NO ROWS)"), output = log.file)
 }else{
-  tempo1 <- aggregate(final.gene.list$Freq, list(final.gene.list$sfeats), median, na.rm = TRUE)
-  names(tempo1) <- c("GENE", "MEDIAN")
-  tempo2 <- aggregate(final.gene.list$sfeats, list(final.gene.list$sfeats), function(x){length(x[ ! is.na(x)])}) # nb
-  if(identical(tempo1$GENE, tempo2$Group.1)){
-    tempo1 <- data.frame(tempo1, NB = tempo2$x)
-  }else{
-    cat(paste0("\n\n============\n\nERROR: IN THE PLOT OF THE final.gene.list OBJECT, tempo1 AND tempo2 SHOULD HAVE THE SAME GENE COLUMN: \n\n============\n\n"))
-    tempo1
-    tempo2
-    cat("\n\n============\n\n")
-    stop()
-  }
-  final.gene.list.median <- cbind(tempo1,
-                                        CI95.inf = aggregate(final.gene.list$Freq, list(final.gene.list$sfeats), quantile, probs = 0.025, na.rm = TRUE)[, 2],
-                                        CI95.sup = aggregate(final.gene.list$Freq, list(final.gene.list$sfeats), quantile, probs = 0.975, na.rm = TRUE)[, 2],
-                                        stringsAsFactors = FALSE
-  )
-  final.gene.list.median <- final.gene.list.median[order(final.gene.list.median$MEDIAN, final.gene.list.median$NB, decreasing = TRUE), ]
-  fun_export_data(path = path.out, data = "FINAL LIST OF PREDICTOR GENES AND FREQUENCIES: ", output = log.file)
-  fun_export_data(path = path.out, data = final.gene.list.median, output = log.file)
-  
-  # horiz barplot + CI
-  bar.width <- 0.5
-  log.scale <- FALSE
-  amplif <- label.size
-  tempo.gg.name <- "gg.indiv.plot."
-  tempo.gg.count <- 0
-  if(nrow(final.gene.list.median) > 20){
-    tempo1 <- table(final.gene.list.median$MEDIAN, useNA = "no")
-    tempo2 <- tempo1[order(names(tempo1), decreasing = TRUE)]
-    tempo3 <- as.numeric(names(cumsum(tempo2)[cumsum(tempo2) <= 20]))
-    tempo.plot <- final.gene.list.median[round(final.gene.list.median$MEDIAN, 7) %in% round(tempo3, 7), ]
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, -MEDIAN), y = MEDIAN))) # reorder from higher to lower
-    tempo.txt <- paste0("BEWARE: ONLY THE ", nrow(tempo.plot), " MOST IMPORTANT GENES PLOTTED, AMONG ", nrow(final.gene.list.median))
-  }else{
-    tempo.plot <- final.gene.list.median
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data =  tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, -MEDIAN), y = MEDIAN))) # reorder from higher to lower
-    tempo.txt <- ""
-  }
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_bar(stat = "identity", position = "dodge", color = "black", fill = grey(0.8), width = bar.width))
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_errorbar(ggplot2::aes(ymin = CI95.inf, ymax = CI95.sup), position = ggplot2::position_dodge(width = bar.width), color = "black", width = 0))
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("FINAL LIST OF PREDICTOR GENES AND FREQUENCIES (NUMBERS ARE NUMBER OF TIMES THE GENE IS SEEN FOR ", slurm.loop.nb, " LOOPS)")))
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab(""))
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("MEDIAN & 95CI"))
-  #     assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme_classic(base_size = amplif))
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
-    # plot.margin = margin(up.space.mds, right.space.mds, down.space.mds, left.space.mds, "inches"), 
-    plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
-    axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
-    axis.text = ggplot2::element_text(size = amplif * 0.75), 
-    axis.title = ggplot2::element_text(size = amplif), 
-    legend.text = ggplot2::element_text(size = amplif), 
-    legend.title = ggplot2::element_text(size = amplif), 
-    strip.text = ggplot2::element_text(size = amplif), 
-    legend.key = ggplot2::element_blank(), 
-    panel.background = ggplot2::element_rect(fill = "grey95"), 
-    panel.border = ggplot2::element_rect(colour = "black", fill = NA), 
-    panel.grid.major.x = ggplot2::element_line(colour = "grey75"), 
-    panel.grid.major.y = ggplot2::element_line(colour = "grey75"), 
-    panel.grid.minor.x = ggplot2::element_blank(), 
-    panel.grid.minor.y = ggplot2::element_blank(), 
-    strip.background = ggplot2::element_rect(fill = "white", colour = "black")
-  ))
-  coord <- ggplot2::ggplot_build(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))$data # to have the summary statistics of the plot. is interesting: x = coord[[2]]$x, y = coord[[2]]$ymax_final
-  assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[2]]$x, y = coord[[2]]$ymax * 1.05, label = tempo.plot$NB, size = amplif/2, color = "black", vjust = "bottom", hjust = "center", angle = 00)) # beware: no need of order() for labels because coord[[1]]$x set the order
-  if(log.scale == TRUE){
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
-  }else{
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_y_continuous(expand = c(0,0), breaks = scales::extended_breaks(10)))
-    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(ylim = c(0, max(final.gene.list.median$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
-  }
-  if(activate.pdf == TRUE){
-    tempo <- dev.set(pdf.nb) # assign to avoid the message
-  }else{
-    windows(5, 5)
-  }
-  print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
-  fun_export_data(path = path.out, data = paste0("SEE THE final.gene.list AND THE final.gene.list.median OBJECTS IN THE compiled_data.RData FILE PRESENT IN: ", path.out), output = log.file)
-  backup.name <- c(backup.name, "final.gene.list.median")
+    tempo.gg.name <- "gg.indiv.plot."
+    tempo.gg.count <- 0
+    if(nrow(final.gene.list.median) > 20){
+        tempo1 <- table(final.gene.list.median$MEDIAN, useNA = "no")
+        tempo2 <- tempo1[order(names(tempo1), decreasing = TRUE)]
+        tempo3 <- as.numeric(names(cumsum(tempo2)[cumsum(tempo2) <= 20]))
+        tempo.plot <- final.gene.list.median[round(final.gene.list.median$MEDIAN, 7) %in% round(tempo3, 7), ]
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data = tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, -MEDIAN), y = MEDIAN))) # reorder from higher to lower
+        tempo.txt <- paste0("BEWARE: ONLY THE ", nrow(tempo.plot), " MOST IMPORTANT GENES PLOTTED, AMONG ", nrow(final.gene.list.median))
+    }else{
+        tempo.plot <- final.gene.list.median
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggplot(data =  tempo.plot, mapping = ggplot2::aes(x = reorder(GENE, -MEDIAN), y = MEDIAN))) # reorder from higher to lower
+        tempo.txt <- ""
+    }
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_bar(stat = "identity", position = "dodge", color = "black", fill = grey(0.8), width = bar.width))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::geom_errorbar(ggplot2::aes(ymin = CI95.inf, ymax = CI95.sup), position = ggplot2::position_dodge(width = bar.width), color = "black", width = 0))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ggtitle(paste0("FINAL LIST OF PREDICTOR GENES AND FREQUENCIES (NUMBERS ARE NUMBER OF TIMES THE GENE IS SEEN FOR ", slurm.loop.nb, " LOOPS)\n", tempo.txt)))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::xlab(""))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::ylab("MEDIAN & 95CI"))
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), m.gg <- ggplot2::theme(
+        plot.title = ggplot2::element_text(hjust=1, vjust=1, size = amplif * 0.5), 
+        axis.text.x = ggplot2::element_text(angle = 90, hjust = 1),
+        axis.text = ggplot2::element_text(size = amplif * 0.75), 
+        axis.title = ggplot2::element_text(size = amplif), 
+        legend.text = ggplot2::element_text(size = amplif), 
+        legend.title = ggplot2::element_text(size = amplif), 
+        strip.text = ggplot2::element_text(size = amplif), 
+        legend.key = ggplot2::element_blank(), 
+        panel.background = ggplot2::element_rect(fill = "grey95"), 
+        panel.border = ggplot2::element_rect(colour = "black", fill = NA), 
+        panel.grid.major.x = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.major.y = ggplot2::element_line(colour = "grey75"), 
+        panel.grid.minor.x = ggplot2::element_blank(), 
+        panel.grid.minor.y = ggplot2::element_blank(), 
+        strip.background = ggplot2::element_rect(fill = "white", colour = "black")
+    ))
+    coord <- ggplot2::ggplot_build(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))$data # to have the summary statistics of the plot. is interesting: x = coord[[2]]$x, y = coord[[2]]$ymax_final
+    assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1),  ggplot2::annotate(geom = "text", x = coord[[2]]$x, y = coord[[2]]$ymax * 1.05, label = tempo.plot$NB, size = amplif/3, color = "black", vjust = "bottom", hjust = "center", angle = 00)) # beware: no need of order() for labels because coord[[1]]$x set the order
+    if(log.scale == TRUE){
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::annotation_logticks(sides = "l"))
+    }else{
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::scale_y_continuous(expand = c(0,0), breaks = scales::extended_breaks(10)))
+        assign(paste0(tempo.gg.name, tempo.gg.count <- tempo.gg.count + 1), ggplot2::coord_cartesian(ylim = c(0, max(tempo.plot$CI95.sup, na.rm = TRUE) * 1.1))) # fix limits
+    }
+    if(activate.pdf == TRUE){
+        tempo <- dev.set(pdf.nb) # assign to avoid the message
+    }else{
+        fun_open_window(pdf.disp = activate.pdf, width.fun = window.width, height.fun = window.height)
+    }
+    print(eval(parse(text = paste(paste0(tempo.gg.name, 1:tempo.gg.count), collapse = " + "))))
 }
 
 
-
-
-
 ################################ End Main code
 
 ################################ Post Main code
 
-
-
 ############ Pdf window closing
 
 fun_close_specif_window()