diff --git a/wGRR b/wGRR index 03c6101a62f99ef8c0d371150ed680cc4068bbb7..288bac2087caa9b358ef22f6ac62f1bb9426b617 100755 --- a/wGRR +++ b/wGRR @@ -7,7 +7,7 @@ trap 'rm -rf "$tmp"' EXIT export LC_ALL=C SECONDS=0 -readonly VERSION=1.4.2 +readonly VERSION=1.4.3 bold=$(tput bold) green=$(tput setaf 2) @@ -136,6 +136,7 @@ MIDENT=0 MMS_MAX_SEQ_PARAM="" EST_M8_SIZE=0 tmp="wgrrtmp" +ARRAYMAX=100000 ## catch option values while getopts :fTsji:p:o:t:a:m:l:C:I:c:n: option ; do @@ -515,31 +516,36 @@ if [[ $BATCHFLAG == 0 ]] ; then fi else printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Submitting $NJOBS jobs for wGRR calculation" | tee -a ${OUT}.wgrr.log - if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then - printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Limiting the number of simultaneous jobs to $MAXJOBS" | tee -a ${OUT}.wgrr.log - JOBARRAY="1-${NJOBS}%${MAXJOBS}" - else - JOBARRAY="1-${NJOBS}" - fi - if [[ $FAST == 1 ]] ; then - PARTITION=("common,dedicated" "-q" "fast") - fi if [[ $REQMEM == "" ]] ; then printf "%-17s -- %s\n" "[ERROR]" "Something went wrong when estimating the required memory." | tee -a ${OUT}.wgrr.log exit 1 fi - JID=$(sbatch --parsable --wait -p ${PARTITION} --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $AWKEXE $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp") - if [[ `sacct -j $JID | grep "TIMEOUT"` ]] ; then - printf "${bold}${red}%-17s -- %s\n${normal}" "[ERROR]" "Some workers encountered a TIMEOUT." | tee -a ${OUT}.wgrr.log - printf "${bold}${red}%-17s -- %s\n${normal}" "[ERROR]" "Saving the partial files in ${OUT}.wgrr_part directory." | tee -a ${OUT}.wgrr.log - rm -rf "$OUT".wgrr_part - mkdir "$OUT".wgrr_part - mv $tmp/"$OUT".wgrr_part.* "$OUT".wgrr_part/ - exit 1 + if [[ $FAST == 1 ]] ; then + PARTITION=("common,dedicated" "-q" "fast") + fi + + if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then + printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Limiting the number of simultaneous jobs to $MAXJOBS" | tee -a ${OUT}.wgrr.log fi - PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') - printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log - QT=$((QT+PQT)) + + JOBARRAY=($(echo ${NJOBS} | awk -v M=${ARRAYMAX} '{i=0;j=1;while(i+M <= $1){i=i+M;if(c==""){c=j"-"i}else{c=c" "j"-"i};j=i+1}if(i<$1){if(c==""){c=j"-"$1}else{c=c" "j"-"$1}}print c}' )) + for JA in ${JOBARRAY} ; do + if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then + JA="${JA}%${MAXJOBS}" + fi + JID=$(sbatch --parsable --wait -p ${PARTITION} --array=${JA} -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $AWKEXE $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp") + if [[ `sacct -j $JID | grep "TIMEOUT"` ]] ; then + printf "${bold}${red}%-17s -- %s\n${normal}" "[ERROR]" "Some workers encountered a TIMEOUT." | tee -a ${OUT}.wgrr.log + printf "${bold}${red}%-17s -- %s\n${normal}" "[ERROR]" "Saving the partial files in ${OUT}.wgrr_part directory." | tee -a ${OUT}.wgrr.log + rm -rf "$OUT".wgrr_part + mkdir "$OUT".wgrr_part + mv $tmp/"$OUT".wgrr_part.* "$OUT".wgrr_part/ + exit 1 + fi + PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') + printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log + QT=$((QT+PQT)) + done mkdir -p "$OUT".logs mv slurm-"$JID"_*.out "$OUT".logs/ fi