#!/bin/bash # shebang (#! https://en.wikipedia.org/wiki/Shebang_%28Unix%29) indicating to the shell what program to interpret the script with, when executed, probably optional here.
# export allow the variable to be use in subprocesses. Without export, the variable is only available in the current process. Example ANNOVAR_CONF=/bioinfo/local/build/annovar_20130729 instead of export ANNOVAR_CONF=/bioinfo/local/build/annovar_20130729
# _conf: lowercases for alias and scripts, and uppercases for variables
@@ -39,7 +38,7 @@ FILE_NAME1_CONF="supplementary_data_file_test.csv" # name of the data file to im
ML_BOOTSTRAP_NB_CONF=3
LOOP_NB_CONF=3
R_RANDOM_SEED="FALSE"#♥ if FALSE, set.seed(1) is systematically used at the beginning of the R script, otherwise, the seed is random (and saved in the RData output)
R_RANDOM_SEED="TRUE"#♥ if FALSE, set.seed(1) is systematically used at the beginning of the R script, otherwise, the seed is random (and saved in the RData output)
################ kind of analysis
...
...
@@ -48,7 +47,7 @@ R_RANDOM_SEED="FALSE" #♥ if FALSE, set.seed(1) is systematically used at the b
# with discovery set 67 indiv (df.nano$cohort_id != "cohortR") and validation set 9 indiv (df.nano$cohort_id == "cohortR")
# "valid_boot" limma and rf training are run once but bootstrap of the validation set 9 indiv (df.nano$cohort_id == "cohortR") using LOOP_NB_CONF parameter
# "full_cross_validation" rows of the dataset are randomly split in two (no replacement), according to CROSS_VALID_RATIO, forming the discovery and validation set
R_ANALYSIS_KIND="longit"
R_ANALYSIS_KIND="full_cross_validation"
CROSS_VALID_RATIO=0.8# proportion (nb indiv randomly selected (wo replacement) for the discovery set) / (total number of indiv)
# -> the validation set is formed by the remaining indiv, with proportion 1 - CROSS_VALID_RATIO
# At the end of the learning procedure, there is one optimal value for the number of genes to include in the model. We use the same strategy as in the learning procedure to select this number of genes and train the random forest algorithm on them. The name of the features is given below.
# echo -e "\nJOB COMMAND EXECUTED:\n$0\n" # to get the line that executes the job but does not work (gives /bioinfo/guests/gmillot/Gael_code/workflow_fastq_gael.sh)
# BEWARE: double __ is a reserved character string to deal with spaces in paths
module purge
...
...
@@ -46,7 +45,8 @@ function single_path_with_regex_fun { # comes from little_bash_functions-v1.0.0/
# 0: single path detected is valid
# 1: error: $1 not provided
# 2: error: $2 provided or more than one path detected
@@ -71,10 +71,16 @@ function single_path_with_regex_fun { # comes from little_bash_functions-v1.0.0/
return 2
else
shopt-s extglob # -s unable global extention, ie the recognition of special global pattern in path, like [[:digit:]]
if[[!(-d${ARG1_ARR[0]}||-f${ARG1_ARR[0]})]];then
if[[$(echo${ARG1_ARR[0]} | grep-cE'^http')== 1 ]];then# -cE to specify extended and -c to return the number of match (here 0 or one only)
if[[$(wget ${ARG1_ARR[0]}>/dev/null 2>&1 ;echo$?)!= 0 ]];then# check the valid url. wget $url >/dev/null 2>&1 prevent any action and print. echo $? print the result of the last command (0 = success, other number = failure)
echo-e"\n### ERROR ### SPECIFIED URL IN single_path_with_regex_fun DOES NOT EXISTS: ${ARG1_ARR[0]}\n";
shopt-u extglob # -u disable global extention, ie the recognition of special global pattern in path, like [[:digit:]]