diff --git a/README.md b/README.md index 95ebe3cd824718ed8da11ccf7de71e4a8fe2ebf8..84c22c8506e8e62fbebeb77f952293282498ace6 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ chmod +x wGRR* ## Usage ### On a local machine ```bash -./wGRR -f $fasta [-p $mmseqs2_path -o $output_prefix -t $threads -a $comparisons -T] +./wGRR -i $fasta [-p $mmseqs2_path -o $output_prefix -t $threads -a $comparisons -T -f] ``` ### On an interactive session on Maestro @@ -47,6 +47,8 @@ sbatch -p hubbioit ./wGRR -f test_2.prt -t 30 This will run wGRR on the file test_2.prt on the hubbioit partition. The MMseqs job will be submitted to the cluster's scheduler with 30 CPUs. Then for the actual wGRR calculation, the required amount of jobs (depending on the value passed with the `-a` option) will be submitted to the queue. If 100 jobs (1 CPU each) are necessary, a job array of 100 jobs will be submitted to the scheduler. You can adjust the number of maximum jobs running simultaneously (to avoid using 100% of your partition's CPUs) by using the `-m` option. +If you do not have access to a dedicated partition, or if there is not enough free CPUs on your partition, you can try to turn on the `-f` flag. By doing so, the wGRR workers will be submitted to the common and dedicated machines of Maestro, on the "fast" Quality of Service (QoS). Jobs running on the fast QoS have a higher priority (so the workers will start faster) but are limited to 2 hours. Also, using the `-m` parameter is less necessary because you will use a lot of different common resources. But you need to be sure that each worker will end in less than 2 hours - otherwise the run will fail. + ### Mandatory parameter `$fasta` is a fasta file containing all the proteins of all the elements you want to compare. The protein names **must** follow the "gembase" convention: ``` diff --git a/wGRR b/wGRR index ae3a77d0225af9fde1a55368209e7320d2134a90..97ef578dea0b7de1d008fb00b14905028f10a3bc 100755 --- a/wGRR +++ b/wGRR @@ -7,7 +7,7 @@ trap 'rm -rf "$tmp"' EXIT export LC_ALL=C SECONDS=0 -readonly VERSION=0.6 +readonly VERSION=0.7 bold=$(tput bold) normal=$(tput sgr0) @@ -334,6 +334,14 @@ else PARTITION=("common,dedicated" "-q" "fast") fi JID=$(sbatch --parsable --wait -p ${PARTITION} --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp") + if [[ `sacct -j $JID | grep "TIMEOUT"` ]] ; then + printf "%-10s -- %s\n" "[ERROR]" "Some workers encountered a TIMEOUT." | tee -a ${OUT}.wgrr.log + printf "%-10s -- %s\n" "[ERROR]" "Saving the partial files in ${OUT}.wgrr_part directory." | tee -a ${OUT}.wgrr.log + rm -rf "$OUT".wgrr_part + mkdir "$OUT".wgrr_part + mv $tmp/"$OUT".wgrr_part.* "$OUT".wgrr_part/ + exit 1 + fi PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') printf "%-10s -- %s %s %s %s %s\n" "[INFO]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log QT=$((QT+PQT))