Commit e21d8626 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

interm

parent fdbfa047
......@@ -14,8 +14,8 @@ alias R_conf='module load gcc/4.7.4 R/3.5.0 ; Rscript'
# SCRIPTS TO RUN
export r_main_functions_conf=/pasteur/homes/gmillot/Git_versions_to_use/cute_little_R_functions-v4.5.0/cute_little_R_functions.R
# export r_main_functions_conf=https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v4.5.0/cute_little_R_functions.R
export bash_main_functions_conf=/pasteur/homes/gmillot/Git_versions_to_use/little_bash_functions-v1.0.0/little_bash_functions-v1.0.0.sh
# export r_ini_check_conf=/pasteur/homes/gmillot/rogge12231/rogge_12231_ini.R
export r_main_conf=/pasteur/homes/gmillot/Git_versions_to_use/rogge_12231-v1.0.0/rogge_12231_main_analysis.R
export r_compil_conf=/pasteur/homes/gmillot/Git_versions_to_use/rogge_12231-v1.0.0/rogge_12231_data_compilation.R
......@@ -27,15 +27,13 @@ export r_compil_conf=/pasteur/homes/gmillot/Git_versions_to_use/rogge_12231-v1.0
################ locations
PATH_LIB_CONF="/pasteur/homes/gmillot/R/x86_64-pc-linux-gnu-library/3.5/" # absolute path of the library folder. Write "" if not required
PATH_LIB_CONF="/pasteur/homes/gmillot/R/x86_64-pc-linux-gnu-library/3.5/" # absolute path of the library folder. Write "none" if not required
PATH_IN_CONF="/pasteur/homes/gmillot/rogge12231/" # absolute path of the data folder
PATH_OUT_CONF="/pasteur/homes/gmillot/rogge12231/" # absolute path of the output folder
PATH_FUNCTION1_CONF="/pasteur/homes/gmillot/Git_versions_to_use/cute_little_R_functions-v4.5.0/" # Define the absolute pathway of the folder containing functions created by Gael Millot
################ File name
FILE_NAME1_CONF="supplementary_data_file_test.csv" # name of the data file to import
NAME_SOURCE_FILE1_CONF="cute_little_R_functions.R"
################ loop & bootstrap
......
This diff is collapsed.
......@@ -13,7 +13,7 @@ erase.graphs <- TRUE # write TRUE to erase all the graphic windows in R before s
sink(stdout(), type = "message")
script <- commandArgs(trailingOnly = FALSE)[1] # recover script name, e.g., r_341_conf $check_lod_gael_conf
args <- commandArgs(trailingOnly = TRUE) # recover arguments written after the call of the Rscript, ie after r_341_conf $check_lod_gael_conf
tempo.arg.names <- c("path.lib", "path.in", "path.out", "path.function1", "file.name1", "name.source.file1", "ml.bootstrap.nb", "project.name", "activate.pdf", "label.size", "optional.text", "slurm.loop.nb", "analysis.kind", "cross.valid.ratio", "random.seed") # objects names exactly in the same order as in the bash code and recovered in args
tempo.arg.names <- c("path.lib", "path.in", "path.out", "path.function1", "file.name1", "ml.bootstrap.nb", "project.name", "activate.pdf", "label.size", "optional.text", "slurm.loop.nb", "analysis.kind", "cross.valid.ratio", "random.seed") # objects names exactly in the same order as in the bash code and recovered in args
if(length(args) != length(tempo.arg.names)){
tempo.cat <- paste0("\n\n================\n\nERROR: THE NUMBER OF ELEMENTS IN args (", length(args),") IS DIFFERENT FROM THE NUMBER OF ELEMENTS IN tempo.arg.names (", length(tempo.arg.names),")\nargs:", paste0(args, collapse = ","), "\ntempo.arg.names:", paste0(tempo.arg.names, collapse = ","), "\n\n================\n\n")
stop(tempo.cat)
......@@ -30,6 +30,7 @@ for(i in 1:length(tempo.arg.names)){
}
################################ Recording of the initial parameters
param.list <- c(
"script",
"args",
......@@ -45,6 +46,7 @@ param.ini.settings <- NULL
for(i in 1:length(param.list)){
param.ini.settings <- c(param.ini.settings, paste0("\n", param.list[i], paste0(rep(" ", space.add[i]), collapse = ""), paste0(get(param.list[i]), collapse = ",")))
}
################################ End Recording of the initial parameters
################################ DEBUG
......@@ -58,9 +60,8 @@ project.name <-"rogge12231"
path.lib <- "/pasteur/homes/gmillot/softwares/R/x86_64-pc-linux-gnu-library/3.5/" # absolute path of the library folder. Write "none" if not required
path.in <- "/pasteur/homes/gmillot/rogge12231/" # absolute path of the data folder
path.out <- "/pasteur/homes/gmillot/rogge12231/" # absolute path of the output folder
path.function1 <- "/pasteur/homes/gmillot/Git_versions_to_use/cute_little_R_functions-v4.4.0/" # Define the absolute pathway of the folder containing functions created by Gael Millot
path.function1 <- "/pasteur/homes/gmillot/Git_versions_to_use/cute_little_R_functions-v4.4.0/cute_little_R_functions.R" # Define the absolute pathway of the folder containing functions created by Gael Millot
file.name1 <- "supplementary_data_file_test.csv" # name of the data file to import in path.in
name.source.file1 <- "cute_little_R_functions.R"
ml.bootstrap.nb <- 3
activate.pdf = TRUE
label.size <- 6
......@@ -81,15 +82,15 @@ project.name <-"rogge12231"
path.lib <- "C:/Users/Gael/Documents/R/win-library/3.5/" # absolute path of the library folder. Write "none" if not required
path.in <- "C:/Users/Gael/Documents/Hub projects/20190126 Las Rogge 12231/Code VG and VR/" # absolute path of the data folder
path.out <- "C:/Users/Gael/Desktop/" # absolute path of the output folder
path.function1 <- "C:/Users/Gael/Documents/Git_versions_to_use/cute_little_R_functions-v4.5.0/" # Define the absolute pathway of the folder containing functions created by Gael Millot
path.function1 <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v4.5.0/cute_little_R_functions.R"
# path.function1 <- "C:/Users/Gael/Documents/Git_versions_to_use/cute_little_R_functions-v4.5.0/cute_little_R_functions.R" # Define the absolute pathway of the folder containing functions created by Gael Millot
file.name1 <- "supplementary_data_file_test.csv" # name of the data file to import in path.in
name.source.file1 <- "cute_little_R_functions.R"
ml.bootstrap.nb <- 3
activate.pdf = TRUE
label.size <- 6
optional.text <- ""
slurm.loop.nb <- 1
analysis.kind <- "longit"
analysis.kind <- "valid_boot"
cross.valid.ratio <- 0.8
random.seed <- TRUE
'
......@@ -124,7 +125,8 @@ req.package.list <- c(
if(path.lib == "none"){
path.lib <- .libPaths() # .libPaths(new = path.lib) # or .libPaths(new = c(.libPaths(), path.lib))
}else{
.libPaths(new = path.lib)
# .libPaths(new = ) add path to default path
.libPaths(new = sub(x = path.lib, pattern = "/$|\\\\$", replacement = "")) # .libPaths() does not support / at the end of a submitted path. Thus check and replace last / or \\ in path
}
for(i0 in 1:length(req.package.list)){
if( ! req.package.list[i0] %in% rownames(installed.packages(lib.loc = path.lib))){
......@@ -134,16 +136,21 @@ for(i0 in 1:length(req.package.list)){
# suppressPackageStartupMessages(library(req.package.list[i0], quietly = TRUE, character.only = TRUE))
}
}
################################ End Packages verification and import
################################ Functions
if( ! (all(dir.exists(path.function1)) & length(path.function1) == 1)){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.out PARAMETER DOES NOT EXISTS: ", paste(path.function1, collapse = " "), "\n\n============\n\n"))
}else if( ! (all(name.source.file1 %in% list.files(path.function1)) & length(name.source.file1) == 1)){
cat(paste0("\n\n============\n\nERROR: name.source.file1 PARAMETER (", paste(name.source.file1, collapse = " "), ") DOES NOT EXIST IN THE DIRECTORY PATH INDICATED IN THE path.function1 PARAMETER: ", path.function1, "\n\n============\n\n"))
if(length(path.function1) != 1){
stop(paste0("\n\n============\n\nERROR: path.function1 PARAMETER MUST BE LENGTH 1: ", paste(path.function1, collapse = " "), "\n\n============\n\n"))
}else if(grepl(x = path.function1, pattern = "^http") & ( ! RCurl::url.exists(path.function1))){
stop(paste0("\n\n============\n\nERROR: HTTP INDICATED IN THE path.function1 PARAMETER DOES NOT EXISTS: ", path.function1, "\n\n============\n\n"))
}else if(( ! grepl(x = path.function1, pattern = "^http")) & ( ! file.exists(path.function1))){
stop(paste0("\n\n============\n\nERROR: FILE INDICATED IN THE path.function1 PARAMETER DOES NOT EXISTS: ", path.function1, "\n\n============\n\n"))
}else{
source(paste0(path.function1, name.source.file1)) # source the fun_ functions used below
source(path.function1) # source the fun_ functions used below
}
################################ End Functions
################################ Main code
......@@ -166,23 +173,30 @@ tempo <- fun_param_check(data = path.lib, class = "character", length = 1) ; eva
if(tempo$problem == FALSE & path.lib != "none"){
if( ! dir.exists(path.lib)){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n"))
arg.check <- TRUE
}
}
tempo <- fun_param_check(data = path.in, class = "character", length = 1) ; eval(ee)
if(tempo$problem == FALSE & ! dir.exists(path.in)){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n"))
arg.check <- TRUE
}
tempo <- fun_param_check(data = file.name1, mode = "character", length = 1) ; eval(ee)
if(tempo$problem == FALSE & ! (file.name1 %in% list.files(path.in))){
cat(paste0("\n\n============\n\nERROR: file.name1 PARAMETER (", file.name1, ") DOES NOT EXIST IN THE DIRECTORY PATH INDICATED IN THE path.in PARAMETER: ", path.in, "\n\n============\n\n"))
arg.check <- TRUE
}
tempo <- fun_param_check(data = path.out, class = "character", length = 1) ; eval(ee)
if(tempo$problem == FALSE & ! dir.exists(path.out)){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.out PARAMETER DOES NOT EXISTS: ", path.out, "\n\n============\n\n"))
arg.check <- TRUE
}
# path.function1 fully tested above
tempo <- fun_param_check(data = path.function1, class = "character", length = 1) ; eval(ee)
tempo <- fun_param_check(data = ml.bootstrap.nb, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
tempo <- fun_param_check(data = project.name, class = "character", length = 1) ; eval(ee)
tempo <- fun_param_check(data = activate.pdf, class = "logical", length = 1) ; eval(ee)
tempo <- fun_param_check(data = label.size, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
tempo <- fun_param_check(data = optional.text, class = "character", length = 1) ; eval(ee)
tempo <- fun_param_check(data = slurm.loop.nb, typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE) ; eval(ee)
tempo <- fun_param_check(data = analysis.kind, options = c("longit", "valid_boot", "full_cross_validation"), length = 1) ; eval(ee)
......
......@@ -222,7 +222,6 @@ CONF_PATH=(
"PATH_LIB_CONF"
"PATH_IN_CONF"
"PATH_OUT_CONF"
"PATH_FUNCTION1_CONF"
)
conf_path_Num=$(( ${#CONF_PATH[@]} - 1 )) # total number of elements in the array
LOCAL_USER_VAR+=" CONF_PATH conf_path_Num" # do not forget the space before the variable name
......@@ -230,7 +229,6 @@ LOCAL_USER_VAR+=" CONF_PATH conf_path_Num" # do not forget the space before the
CONF_VAR_CHECK=(
# R PARAMETERS
"FILE_NAME1_CONF"
"NAME_SOURCE_FILE1_CONF"
"ML_BOOTSTRAP_NB_CONF"
"LOOP_NB_CONF"
"R_RANDOM_SEED"
......@@ -353,7 +351,7 @@ while [[ $COUNT < $(($LOOP_NB_CONF + 1)) ]] ; do
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $COUNT $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $COUNT $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --job-name=wait_loop1 --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a ${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/loop${COUNT}_${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
......@@ -368,7 +366,7 @@ while [[ $COUNT < $(($LOOP_NB_CONF + 1)) ]] ; do
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED" # beware $COUNT replaced by ${SLURM_ARRAY_TASK_ID} because job array
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED" # beware $COUNT replaced by ${SLURM_ARRAY_TASK_ID} because job array
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --array=2-$LOOP_NB_CONF --job-name=wait_loop_all --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a $(for((i = 2 ; i <= $LOOP_NB_CONF ; i++)) ; do echo ${OUTPUT_DIR_PATH_tempo}/loop${i}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt ; done) # tee is dispached in all the dir of the job array
......@@ -380,7 +378,7 @@ while [[ $COUNT < $(($LOOP_NB_CONF + 1)) ]] ; do
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --array=1-$LOOP_NB_CONF --job-name=wait_loop_all --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a $(for((i = 1 ; i <= $LOOP_NB_CONF ; i++)) ; do echo ${OUTPUT_DIR_PATH_tempo}/loop${i}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt ; done) # tee is dispached in all the dir of the job array
......@@ -394,7 +392,7 @@ echo -e '#!/bin/sh
LOCAL_USER_VAR+=" SUP_VAR_tempo" # do not forget the space before the variable name
if [[ $R_ANALYSIS_KIND =~ longit || $LOOP_NB_CONF == 1 ) ]] ; then
if [[ $R_ANALYSIS_KIND =~ longit || $LOOP_NB_CONF == 1 ]] ; then
echo "NO NEED TO COMPILE DATA SINCE NO LOOP PERFORMED\n"
else
OUTPUT_DIR_PATH_FINAL="${OUTPUT_DIR_PATH_tempo}/final_res"
......@@ -404,7 +402,7 @@ else
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_compil_conf} $PATH_LIB_CONF $OUTPUT_DIR_PATH_tempo $OUTPUT_DIR_PATH_FINAL $PATH_FUNCTION1_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $LOOP_NB_CONF $R_ANALYSIS_KIND"
R_PROC="R_conf ${r_compil_conf} $PATH_LIB_CONF $OUTPUT_DIR_PATH_tempo $OUTPUT_DIR_PATH_FINAL $r_main_functions_conf $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $LOOP_NB_CONF $R_ANALYSIS_KIND"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_FINAL/r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_FINAL/r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --job-name=compil --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR | tee -a ${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/loop${COUNT}_${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment