From 8e7e191f7c429270704d3ada8f6e92593e7f99f2 Mon Sep 17 00:00:00 2001 From: Julien Guglielmini <julien.guglielmini@pasteur.fr> Date: Thu, 24 Feb 2022 17:20:37 +0100 Subject: [PATCH] bug fixes --- wGRR | 14 +++++--------- wGRR.awk | 4 ++-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/wGRR b/wGRR index fb69f98..9bfb6e8 100755 --- a/wGRR +++ b/wGRR @@ -156,7 +156,7 @@ echo "[INFO] -- wGRR version ${VERSION}" if [[ $BATCHFLAG == 0 ]] ; then echo "[INFO] -- The command is:" - echo "[INFO] -- ./wGRR $@" + echo "[INFO] -- ./wGRR "$@ if [[ $THREADS -gt $(nproc) ]] ; then echo "[ERROR] -- You required $THREADS threads but your machine has $(nproc) cores." @@ -229,7 +229,7 @@ if [[ -f $OUT.m8 ]] ; then else MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT) if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then - MMS_MAX_SEQ_PARAM=$(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}') + MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}')) echo "[INFO] -- Setting MMseqs parameter $MMS_MAX_SEQ_PARAM" fi @@ -295,28 +295,24 @@ if [[ $BATCHFLAG == 0 ]] ; then for i in {1..$NJOBS} ; do echo $i done | xargs -n 1 -P $THREADS sh -c 'arg=$5; NJOBS=$2 ; printf "\r[PROGRESS] -- [%-40s]" $(head -c $((arg*40/NJOBS+1)) < /dev/zero | tr "\0" "=") ; ./wGRR_worker.zsh $0 $1 $NJOBS $arg $3 $4' $ARRAYSIZE $OUT $NJOBS $PRT $tmp + printf "\n" fi else REQMEM="" echo "[INFO] -- Estimating required memory" - M1=$(awk -f wGRR_mem.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT $OUT.allpairs.txt $PRT $OUT.m8) + M1=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.allpairs.txt $PRT $OUT.m8) REQMEM=$(bc -l <<< $(numfmt --from=iec $M1)*($ARRAYSIZE/10) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') echo "[INFO] -- $REQMEM per job required" duration=$SECONDS printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration) echo "[INFO] -- Submitting $NJOBS jobs for wGRR calculation" - if [[ $PARTITION == "common" ]] ; then - FAST_PART="common,dedicated" - else - FAST_PART=$PARTITION - fi if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then echo "[INFO] -- Limiting the number of simultaneous jobs to $MAXJOBS" JOBARRAY="1-${NJOBS}%${MAXJOBS}" else JOBARRAY="1-${NJOBS}" fi - JID=$(sbatch --parsable --wait -p "$FAST_PART" --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp") + JID=$(sbatch --parsable --wait -p "$PARTITION" --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp") PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') echo "[INFO] -- The job $JID has been "$(textifyDuration $PQT) "in queue" QT=$((QT+PQT)) diff --git a/wGRR.awk b/wGRR.awk index c21314a..80492fa 100755 --- a/wGRR.awk +++ b/wGRR.awk @@ -267,7 +267,7 @@ END { if(gi==gj){ lc = length(cluster) - if(MEM){ + if(!MEM){ print gi,gj,sumidR/mp,ncommonR/li,li,lj,sumidR/minprot,ncommonR/((lli+llj)/2),lli,llj,sumidC/lc,length(bbh_clust)/lc,lc,lc } } @@ -278,7 +278,7 @@ END { avg_wgrr3=(part_wgrr3[gj":"gi]+sumidC/lc)/2 split(final_out[gj":"gi],ff,"\t") # Print twice for both directions of the wGRR - if(MEM){ + if(!MEM){ print ff[1],ff[2],avg_wgrr1,ff[4],ff[5],ff[6],avg_wgrr2,ff[8],ff[9],ff[10],avg_wgrr3,ff[12],ff[13],ff[14] print gi,gj,avg_wgrr1,ncommonR/((li+lj)/2),li,lj,avg_wgrr2,ncommonR/((lli+llj)/2),lli,llj,avg_wgrr3,length(bbh_clust)/((lci+lcj)/2),lci,lcj } -- GitLab