Commit 556acf47 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

interm

parent 6c9ca302
......@@ -47,7 +47,7 @@ R_RANDOM_SEED="TRUE" #♥ if FALSE, set.seed(1) is systematically used at the be
# with discovery set 67 indiv (df.nano$cohort_id != "cohortR") and validation set 9 indiv (df.nano$cohort_id == "cohortR")
# "valid_boot" limma and rf training are run once but bootstrap of the validation set 9 indiv (df.nano$cohort_id == "cohortR") using LOOP_NB_CONF parameter
# "full_cross_validation" rows of the dataset are randomly split in two (no replacement), according to CROSS_VALID_RATIO, forming the discovery and validation set
R_ANALYSIS_KIND="full_cross_validation"
R_ANALYSIS_KIND="valid_boot"
CROSS_VALID_RATIO=0.8 # proportion (nb indiv randomly selected (wo replacement) for the discovery set) / (total number of indiv)
# -> the validation set is formed by the remaining indiv, with proportion 1 - CROSS_VALID_RATIO
......
This diff is collapsed.
......@@ -121,7 +121,7 @@ req.package.list <- c(
"lubridate",
"RCurl"
)
if(path.lib == "none"){
if(all(path.lib == "none")){
path.lib <- .libPaths() # .libPaths(new = path.lib) # or .libPaths(new = c(.libPaths(), path.lib))
}else{
# .libPaths(new = ) add path to default path
......@@ -174,10 +174,10 @@ tempo <- fun_param_check(data = args, class = "character", length = length(tempo
tempo <- fun_param_check(data = tempo.arg.names, class = "character", length = length(tempo.arg.names)) ; eval(ee)
# imported objects
tempo <- fun_param_check(data = path.lib, class = "character", length = 1) ; eval(ee)
if(tempo$problem == FALSE & path.lib != "none"){
if( ! dir.exists(path.lib)){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.in PARAMETER DOES NOT EXISTS: ", path.in, "\n\n============\n\n"))
tempo <- fun_param_check(data = path.lib, class = "character") ; eval(ee)
if(tempo$problem == FALSE & ! all(path.lib == "none")){
if( ! all(dir.exists(path.lib))){
cat(paste0("\n\n============\n\nERROR: DIRECTORY PATH INDICATED IN THE path.lib PARAMETER DOES NOT EXISTS:\n", paste(path.lib, collapse = "\n"), "\n\n============\n\n"))
arg.check <- TRUE
}
}
......@@ -349,7 +349,7 @@ if(any((analysis.kind == "longit" & slurm.loop.nb == 1) | (analysis.kind == "val
fun_export_data(path = path.out, data = "################################ LIMMA ANALYSIS", output = log.file)
fun_export_data(path = path.out, data = 'PARAMETERS USED: topTable(fit, coef = "R - NR", adjust.method = "BH", p.value = 0.05, number = nrow(X))' , output = log.file)
if(nrow(ttab) > 0){
fun_export_data(path = path.out, data = ttab , output = log.file)
fun_export_data(path = path.out, data = ttab , output = log.file, rownames.kept = TRUE)
}else{
fun_export_data(path = path.out, data = "NO GENE LIST RETURNED WITH THE PARAMETERS USED (P VALUES ABOVE 0.05 AFTER CORRECTION FOR INSTANCE)" , output = log.file)
}
......@@ -493,7 +493,9 @@ if(any((analysis.kind == "longit" & slurm.loop.nb == 1) | (analysis.kind == "val
feature_importance <- getFeatureImportance(mod$learner.model$next.model)
df_imp <- data.frame(features = names(feature_importance$res),
importance = t(feature_importance$res), stringsAsFactors = FALSE)
importance.plot <- ggplot2::ggplot(data = df_imp, aes(x = reorder(features, -importance), y=importance)) + geom_bar(stat = "identity") + theme_bw() +
importance.plot <- ggplot2::ggplot(data = df_imp, aes(x = reorder(features, -importance), y=importance)) + geom_bar(stat = "identity") +
theme_bw() +
ggplot2::xlab("") +
theme(
axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),
plot.title = ggplot2::element_text(hjust=1, vjust=1, size = label.size),
......
......@@ -202,8 +202,9 @@ source $CONFIG_FILE
#### check the files and variables necessary for the process .sh file
# TOOLS
for i in "R_conf" ; do
# check alias. Beware: TEST_ALIAS used later to create a tempo alias file for TARS
TEST_ALIAS="R_conf" # put space between strings if several to test
for i in $TEST_ALIAS ; do
if [[ ! $(type -t $i) == "alias" ]] ; then
echo -e "\n### ERROR ### $i ALIAS NOT AVAILABLE IN THE GLOBAL ENVIRONMENT: CHECK IN ${CONFIG_FILE}\n"
usage
......@@ -343,7 +344,39 @@ mkdir ${OUTPUT_DIR_PATH_tempo}
for ((i=1; i<=$LOOP_NB_CONF; i++)); do
mkdir ${OUTPUT_DIR_PATH_tempo}/loop$i
done
SUP_VAR="SCRIPT_USED=$SCRIPT_USED,JOB_ID=$JOB_ID,CONFIG_FILE=$CONFIG_FILE,REMOVE_TMP=$REMOVE_TMP,OUTPUT_DIR_PATH_tempo=$OUTPUT_DIR_PATH_tempo" # all this must be injected into the TARS job
# create the tampo alias file for TARS
ALIAS_FILE="${OUTPUT_DIR_PATH_tempo}/tempo_alias_${JOB_ID}.txt"
> ${ALIAS_FILE} ; # empty file created
for i in $TEST_ALIAS ; do
# echo $i >> ${ALIAS_FILE}
echo -e "alias $(alias $i )\n" >> ${ALIAS_FILE} ; # beware: space before ) is very important
# DANGER: in sh SCRIPT I have to use this : echo -e "alias $(alias $i )\n" >> ${ALIAS_FILE} ;
# But on TARS DIRECTLY typing, I have to use echo -e "$(alias $i )\n" >> ${ALIAS_FILE} ;
done
# CONFIG_FILE=$CONFIG_FILE, # !!!!!!!!!!!!!!!! DANGER NEVER PUT THE CONFIG_FILE IN SUP_VAR FOR SOURCE AGAIN BECAUSE THE CONFIG FILE CAN HAVE BEEN MODIFIED FOR ANOTHER JOB !! PUT BY HAND ALL THE DESIRED VARIABLES IN SUP_VAR
# R_conf, #do not work because it is an alias
# all these in SUP_VAR will be injected into the TARS job
SUP_VAR="JOB_ID=$JOB_ID"
SUP_VAR+=",REMOVE_TMP=$REMOVE_TMP"
# For sbatch script
SUP_VAR+=",OUTPUT_DIR_PATH_tempo=$OUTPUT_DIR_PATH_tempo"
SUP_VAR+=",ALIAS_FILE=$ALIAS_FILE"
# for R script args
SUP_VAR+=",r_main_conf=${r_main_conf}"
SUP_VAR+=",PATH_LIB_CONF=$PATH_LIB_CONF"
SUP_VAR+=",PATH_IN_CONF=$PATH_IN_CONF"
SUP_VAR+=",r_main_functions_conf=$r_main_functions_conf"
SUP_VAR+=",FILE_NAME1_CONF=$FILE_NAME1_CONF"
SUP_VAR+=",ML_BOOTSTRAP_NB_CONF=$ML_BOOTSTRAP_NB_CONF"
SUP_VAR+=",PROJECT_NAME_CONF=$PROJECT_NAME_CONF"
SUP_VAR+=",LABEL_SIZE=$LABEL_SIZE"
SUP_VAR+=",R_OPT_TXT_CONF=$R_OPT_TXT_CONF"
SUP_VAR+=",R_ANALYSIS_KIND=$R_ANALYSIS_KIND"
SUP_VAR+=",CROSS_VALID_RATIO=$CROSS_VALID_RATIO"
SUP_VAR+=",R_RANDOM_SEED=$R_RANDOM_SEED"
LOCAL_USER_VAR+=" OUTPUT_DIR_PATH_tempo SUP_VAR" # do not forget the space before the variable name
COUNT=1
......@@ -352,11 +385,12 @@ while [[ $COUNT -lt $(($LOOP_NB_CONF + 1)) ]] ; do # is less than
if [[ $R_ANALYSIS_KIND =~ valid_boot && $COUNT == "1" ]] ; then
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# source $CONFIG_FILE # !!!!!!!!!!!!!!!! DANGER NEVER SOURCE AGAIN BECAUSE THE CONFIG FILE CAN HAVE BEEN MODIFIED FOR ANOTHER JOB !! PUT BY HAND ALL THE DESIRED VARIABLES IN SUP_VAR
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
echo -e "\nSBATCH CORRESPONDING TO R_ANALYSIS_KIND =~ valid_boot && COUNT == 1\n"
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF $COUNT $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
source $ALIAS_FILE # recover the alias
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF $COUNT $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --job-name=wait_loop1_${JOB_ID} --wait --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo | tee -a ${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/loop${COUNT}_${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
......@@ -369,10 +403,11 @@ while [[ $COUNT -lt $(($LOOP_NB_CONF + 1)) ]] ; do # is less than
' | sbatch --dependency=singleton -p $DEDICATED_CONF --job-name=wait_loop1_${JOB_ID} --wait --qos $QOS_CONF --time 1 -c 1 --mem-per-cpu 10M --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# source $CONFIG_FILE # !!!!!!!!!!!!!!!! DANGER NEVER SOURCE AGAIN BECAUSE THE CONFIG FILE CAN HAVE BEEN MODIFIED FOR ANOTHER JOB !! PUT BY HAND ALL THE DESIRED VARIABLES IN SUP_VAR
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
echo -e "\nSBATCH CORRESPONDING TO R_ANALYSIS_KIND =~ valid_boot && COUNT > 1\n"
source $ALIAS_FILE # recover the alias
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED" # beware $COUNT replaced by ${SLURM_ARRAY_TASK_ID} because job array
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
......@@ -383,11 +418,12 @@ while [[ $COUNT -lt $(($LOOP_NB_CONF + 1)) ]] ; do # is less than
else
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# source $CONFIG_FILE # !!!!!!!!!!!!!!!! DANGER NEVER SOURCE AGAIN BECAUSE THE CONFIG FILE CAN HAVE BEEN MODIFIED FOR ANOTHER JOB !! PUT BY HAND ALL THE DESIRED VARIABLES IN SUP_VAR
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
echo -e "\nSBATCH CORRESPONDING TO ELSE\n"
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
source $ALIAS_FILE # recover the alias
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $r_main_functions_conf $FILE_NAME1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO $R_RANDOM_SEED"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --array=1-$LOOP_NB_CONF --job-name=wait_loop_all --wait --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo | tee -a $(for((i = 1 ; i <= $LOOP_NB_CONF ; i++)) ; do echo ${OUTPUT_DIR_PATH_tempo}/loop${i}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt ; done) # tee is dispached in all the dir of the job array
......@@ -408,13 +444,16 @@ if [[ $R_ANALYSIS_KIND =~ longit || $LOOP_NB_CONF == "1" ]] ; then
else
OUTPUT_DIR_PATH_FINAL="${OUTPUT_DIR_PATH_tempo}/final_res"
mkdir ${OUTPUT_DIR_PATH_FINAL}
SUP_VAR+=",r_compil_conf=$r_compil_conf"
SUP_VAR+=",OUTPUT_DIR_PATH_FINAL=$OUTPUT_DIR_PATH_FINAL"
SUP_VAR+=",LOOP_NB_CONF=$LOOP_NB_CONF"
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# source $CONFIG_FILE # !!!!!!!!!!!!!!!! DANGER NEVER SOURCE AGAIN BECAUSE THE CONFIG FILE CAN HAVE BEEN MODIFIED FOR ANOTHER JOB !! PUT BY HAND ALL THE DESIRED VARIABLES IN SUP_VAR
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
echo -e "\nSBATCH CORRESPONDING TO COMPIL\n"
R_PROC="R_conf ${r_compil_conf} $PATH_LIB_CONF ${OUTPUT_DIR_PATH_tempo}/ ${OUTPUT_DIR_PATH_FINAL}/ $r_main_functions_conf $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF $LOOP_NB_CONF $R_ANALYSIS_KIND"
source $ALIAS_FILE # recover the alias
R_PROC="R_conf ${r_compil_conf} $PATH_LIB_CONF ${OUTPUT_DIR_PATH_tempo}/ ${OUTPUT_DIR_PATH_FINAL}/ $r_main_functions_conf $PROJECT_NAME_CONF $LABEL_SIZE $R_OPT_TXT_CONF $LOOP_NB_CONF $R_ANALYSIS_KIND"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_FINAL}/r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_FINAL}/r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --job-name=compil --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR | tee -a ${OUTPUT_DIR_PATH_FINAL}/${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment