Commit 62f37620 authored by Gael  MILLOT's avatar Gael MILLOT
Browse files

interm

parent 61c6eb5f
......@@ -8,7 +8,7 @@
# SOFTWARES (LOCALS OR IN MODULES)
shopt -s expand_aliases # to be sure that alias are expended to the different environments
alias R_conf='module load gcc/4.7.4 R/3.5.0 ; Rscript'
# alias R_conf='/pasteur/homes/gmillot/softwares/R/R-3.5.2/lib64/R/bin/Rscript'
# alias R_conf='module load gcc/4.7.4 ; /pasteur/homes/gmillot/softwares/R/R-3.5.2/lib64/R/bin/Rscript'
#_______________________________________________________________________________________________
# SCRIPTS TO RUN
......@@ -41,6 +41,17 @@ NAME_SOURCE_FILE1_CONF="cute_little_R_functions.R"
ML_BOOTSTRAP_NB_CONF=3
LOOP_NB_CONF=3
################ kind of analysis
# either:
# "longit" The whole R script (limma, rf training, rf validation) is run only once (LOOP_NB_CONF param is reset to 1 if != 1)
# with discovery set 67 indiv (df.nano$cohort_id != "cohortR") and validation set 9 indiv (df.nano$cohort_id == "cohortR")
# "valid_boot" limma and rf training are run once but bootstrap of the validation set 9 indiv (df.nano$cohort_id == "cohortR") using LOOP_NB_CONF parameter
# "full_cross_validation" rows of the dataset are randomly split in two (no replacement), according to CROSS_VALID_RATIO, forming the discovery and validation set
R_ANALYSIS_KIND="longit"
CROSS_VALID_RATIO=0.8 # proportion (nb indiv randomly selected (wo replacement) for the discovery set) / (total number of indiv)
# -> the validation set is formed by the remaining indiv, with proportion 1 - CROSS_VALID_RATIO
################ other
PROJECT_NAME_CONF="rogge_12231"
......@@ -48,8 +59,8 @@ PROJECT_NAME_CONF="rogge_12231"
################ graphical parameters
R_PDF_DISPLAY_CONF="TRUE" # will be converted in the R script
LABEL_SIZE=4
R_OPT_TXT_CONF="notxt"
LABEL_SIZE=4 # size of the lables in graphs
R_OPT_TXT_CONF="no.txt" # optional text to add
......
This diff is collapsed.
......@@ -136,56 +136,36 @@ echo -e "JOB ID: $JOB_ID\n"
usage () {
cat << EOF
USAGE: `basename $0` -c config_file.conf -K ini_check
USAGE: `basename $0` -c config_file.conf
OPTIONS:
-h help
-c name and path of the config .conf file. Optional if rogge_12231.conf is in /pasteur/homes/gmillot/rogge12231/
-V sbatch --export local variable option. Declare here supplemental variables that will be used in the process.sh file (other than defined in the previous options)
-K --running_operation
'ini_check' - check the initial parameters and files
'running' - run the analysis
Optional (default: perform everything). Separate arguments using comma
-r --remove_tm_files logical option: Remove temporary files ? Only 0 or 1 admitted. Default true (0).
EX1: `basename $0` -p ~/process.sh -K ini_check,running
EX1: `basename $0` -c /pasteur/homes/gmillot/rogge12231/
EOF
}
while getopts ":hc:V:K:r:" OPTION ; do
while getopts ":hc:r:" OPTION ; do
# add : after the option name to specify that something is required (-h has nothing required after)
# the first : before h is to induce getopts switching to "silent error reporting mode" (disable annoying messages).
case $OPTION in
h) usage; exit 1 ;;
c) verif_fun $OPTARG ; CONFIG_FILE=$OPTARG ;;
V) verif_fun $OPTARG ; SUP_VAR=$OPTARG ;;
K) verif_fun $OPTARG ; RUNNING_OP=$OPTARG ;;
r) verif_fun $OPTARG ; REMOVE_TMP=$OPTARG ;;
\?) echo -e "### ERROR ### INVALID OPTION: - $OPTARG\n" ; usage; exit 1;;
:) echo "### ERROR ### OPTION -$OPTARG REQUIRES AN ARGUMENT\n" >&2 usage; exit 1;;
esac
done
shift $((OPTIND-1))
LOCAL_USER_VAR+=" CONFIG_FILE PROCESS_FILE SUP_VAR RUNNING_OP REMOVE_TMP" # do not forget the space before the variable name
LOCAL_USER_VAR+=" CONFIG_FILE REMOVE_TMP" # do not forget the space before the variable name
################ CHECK
if [[ -z $RUNNING_OP ]]; then
RUNNING_OP="ini_check,running"
else
if [[ ! $RUNNING_OP =~ ini_check|running ]]; then
echo -e "\n### ERROR ### -K OPTION WITH INVALID ARGUMENT. ONLY ini_check, running AUTHORIZED: $RUNNING_OP\n"
usage
exit 1
else
echo -e "FOLLOWING OPERATIONS WILL BE EXECUTED (-K OPTION): $RUNNING_OP\n"
fi
fi
######## CONF FILE CHECKING
if [[ ! -z $CONFIG_FILE ]] ; then
......@@ -333,49 +313,84 @@ for i in "show_time_fun" ; do
done
echo -e "\n"
################ END CHECK
# ckeck the correct values
################ sbatch PARAMETERS
if [[ ! -z $SUP_VAR ]] ; then
if [[$SUP_VAR =~ CONFIG_FILE|$CONFIG_FILE|PROCESS_FILE|$PROCESS_FILE|SUP_VAR|RUNNING_OP|REMOVE_TMP ]] ; then
echo -e "\n### ERROR ### VARIABLE(S) $SUP_VAR DECLARED IN -V SHOULD BE OR ALREADY DECLARED IN -d, -o, -c, -V -K -r\n"
usage
exit 1
fi
if [[ ! $R_ANALYSIS_KIND =~ longit|valid_boot|full_cross_validation ]]; then
echo -e "\n\n### ERROR ### INVALID SETTING FOR R_ANALYSIS_KIND\nONLY longit, valid_boot OR full_cross_validation AUTHORIZED: $R_ANALYSIS_KIND\nCHECK IN $CONFIG_FILE\n\n"
usage
exit 1
else
SUP_VAR=""
echo -e "\n\nFOLLOWING R ANALYSIS WILL BE EXECUTED: $R_ANALYSIS_KIND\n\n"
fi
RUNNING_OP_EXPORT=$(echo $RUNNING_OP | sed 's/,/_._/g') # replace comma by _._ , Important because when imported into sbatch variables, commas are not considered and everything behind is skipped
SUP_VAR="SCRIPT_USED=$SCRIPT_USED,CONFIG_FILE=$CONFIG_FILE,RUNNING_OP=$RUNNING_OP_EXPORT,REMOVE_TMP=$REMOVE_TMP"
if [[ $R_ANALYSIS_KIND =~ longit && $LOOP_NB_CONF != 1 ]]; then
LOOP_NB_CONF=1
echo -e "\n\nR_ANALYSIS_KIND PARAMETER SET TO $R_ANALYSIS_KIND: LOOP_NB_CONF PARAMETER RESET TO 1\n\n"
fi
################ END sbatch PARAMETERS
################ END CHECK
################ sbatch LOOP
OUTPUT_DIR_PATH_tempo="${PATH_OUT_CONF}${PROJECT_NAME_CONF}_${JOB_ID}"
mkdir ${OUTPUT_DIR_PATH_tempo}
LOCAL_USER_VAR+=" OUTPUT_DIR_PATH_tempo" # do not forget the space before the variable name
for i in `seq 1 $LOOP_NB_CONF` ; do
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop$i/"
mkdir ${OUTPUT_DIR_PATH_tempo2}
SUP_VAR_tempo="$SUP_VAR,OUTPUT_DIR_PATH_tempo2=$OUTPUT_DIR_PATH_tempo2,i=$i"
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $i"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${i}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${i}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF -J job_$i --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo | tee -a ${OUTPUT_DIR_PATH_tempo2}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
# tricky part of this sbatch because the SUP_VAR_tempo will be used in the script piped to sbatch
for ((i=1; i<=$LOOP_NB_CONF; i++)); do
mkdir ${OUTPUT_DIR_PATH_tempo}/loop$i
done
SUP_VAR="SCRIPT_USED=$SCRIPT_USED,CONFIG_FILE=$CONFIG_FILE,REMOVE_TMP=$REMOVE_TMP,OUTPUT_DIR_PATH_tempo=$OUTPUT_DIR_PATH_tempo" # all this must be injected into the TARS job
LOCAL_USER_VAR+=" OUTPUT_DIR_PATH_tempo SUP_VAR" # do not forget the space before the variable name
COUNT=0
while [[ $COUNT < $(($LOOP_NB_CONF + 1)) ]] ; do
((COUNT=COUNT + 1))
SUP_VAR_tempo="$SUP_VAR,COUNT=$COUNT"
if [[ $R_ANALYSIS_KIND =~ valid_boot && $COUNT == 1 ]] ; then
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF $COUNT $R_ANALYSIS_KIND $CROSS_VALID_RATIO"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${COUNT}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --job-name=wait_loop1 --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a ${OUTPUT_DIR_PATH_tempo}/loop${COUNT}/loop${COUNT}_${PROJECT_NAME_CONF}_slurm_jobID.txt # write all th echo from the $PROC alaso into a log file
# tricky part of this sbatch because the SUP_VAR_tempo will be used in the script piped to sbatch
((COUNT=COUNT + 1))
elif [[ $R_ANALYSIS_KIND =~ valid_boot && $COUNT > 1 ]] ; then
echo -e '#!/bin/sh
echo "LOOP 1 HAS CORRECTLY BEEN WAIT FOR END\n"
' | sbatch --dependency=singleton --job-name=wait_loop1 --wait -p $DEDICATED_CONF --qos $QOS_CONF --time 1 -c 1 --mem-per-cpu 10M --mail-type END,FAIL --mail-user $MAIL_CONF
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO" # beware $COUNT replaced by ${SLURM_ARRAY_TASK_ID} because job array
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --array=2-$LOOP_NB_CONF --job-name=wait_loop_all --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a $(for((i = 2 ; i <= $LOOP_NB_CONF ; i++)) ; do echo ${OUTPUT_DIR_PATH_tempo}/loop${i}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt ; done) # tee is dispached in all the dir of the job array
# tricky part of this sbatch because the SUP_VAR_tempo will be used in the script piped to sbatch
COUNT=$(($LOOP_NB_CONF + 1))
else
echo -e '#!/bin/sh
# write the previous line exactly like this, with no comments, otherwise do not work
source $CONFIG_FILE # never forget this because another environment
OUTPUT_DIR_PATH_tempo2="${OUTPUT_DIR_PATH_tempo}/loop${SLURM_ARRAY_TASK_ID}/"
# next line cannot be put outside (which would have been convenient -> put into the SUP_VAR_tempo for display. But SUP_VAR_tempo for sbatch do not like spaces)
R_PROC="R_conf ${r_main_conf} $PATH_LIB_CONF $PATH_IN_CONF ${OUTPUT_DIR_PATH_tempo2} $PATH_FUNCTION1_CONF $FILE_NAME1_CONF $NAME_SOURCE_FILE1_CONF $ML_BOOTSTRAP_NB_CONF $PROJECT_NAME_CONF $R_PDF_DISPLAY_CONF $LABEL_SIZE $R_OPT_TXT_CONF ${SLURM_ARRAY_TASK_ID} $R_ANALYSIS_KIND $CROSS_VALID_RATIO"
R_PROC2="${R_PROC} &> ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt" # or "$R_PROC > ${OUTPUT_DIR_PATH_tempo2}loop${SLURM_ARRAY_TASK_ID}_r_console_messages.txt 2>&1" # to add the estderror in the stdout
eval "$R_PROC2"
' | sbatch -p $DEDICATED_CONF --array=1-$LOOP_NB_CONF --job-name=wait_loop_all --qos $QOS_CONF --time $MAX_RUNNING_TIME_CONF -c $NB_CPU_PER_TASK_CONF --mem-per-cpu $MEM_PER_CPU_CONF --mail-type END,FAIL --mail-user $MAIL_CONF --export $SUP_VAR_tempo --wait | tee -a $(for((i = 1 ; i <= $LOOP_NB_CONF ; i++)) ; do echo ${OUTPUT_DIR_PATH_tempo}/loop${i}/loop${i}_${PROJECT_NAME_CONF}_slurm_jobID.txt ; done) # tee is dispached in all the dir of the job array
# tricky part of this sbatch because the SUP_VAR_tempo will be used in the script piped to sbatch
COUNT=$(($LOOP_NB_CONF + 1))
fi
done
echo -e '#!/bin/sh
echo "ALL LOOPS HAVE CORRECTLY BEEN WAIT FOR END\n"
' | sbatch --dependency=singleton --job-name=wait_loop_all --wait -p $DEDICATED_CONF --qos $QOS_CONF --time 1 -c 1 --mem-per-cpu 10M --mail-type END,FAIL --mail-user $MAIL_CONF
LOCAL_USER_VAR+=" SUP_VAR_tempo" # do not forget the space before the variable name
################ END MAIN CODE
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment