From 8e7e191f7c429270704d3ada8f6e92593e7f99f2 Mon Sep 17 00:00:00 2001
From: Julien Guglielmini <julien.guglielmini@pasteur.fr>
Date: Thu, 24 Feb 2022 17:20:37 +0100
Subject: [PATCH] bug fixes

---
 wGRR     | 14 +++++---------
 wGRR.awk |  4 ++--
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/wGRR b/wGRR
index fb69f98..9bfb6e8 100755
--- a/wGRR
+++ b/wGRR
@@ -156,7 +156,7 @@ echo "[INFO]      --  wGRR version ${VERSION}"
 
 if [[ $BATCHFLAG == 0 ]] ; then
 	echo "[INFO]      --  The command is:"
-	echo "[INFO]      --  ./wGRR $@"
+	echo "[INFO]      --  ./wGRR "$@
 
 	if [[ $THREADS -gt $(nproc) ]] ; then
 		echo "[ERROR]     --  You required $THREADS threads but your machine has $(nproc) cores."
@@ -229,7 +229,7 @@ if [[ -f $OUT.m8 ]] ; then
 else
 	MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT)
 	if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then
-		MMS_MAX_SEQ_PARAM=$(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}')
+		MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}'))
 		echo "[INFO]      --  Setting MMseqs parameter $MMS_MAX_SEQ_PARAM"
 	fi
 
@@ -295,28 +295,24 @@ if [[ $BATCHFLAG == 0 ]] ; then
 		for i in {1..$NJOBS} ; do
 			echo $i
 		done | xargs -n 1 -P $THREADS sh -c 'arg=$5; NJOBS=$2 ; printf "\r[PROGRESS]  --  [%-40s]"  $(head -c $((arg*40/NJOBS+1)) < /dev/zero | tr "\0" "=") ; ./wGRR_worker.zsh $0 $1 $NJOBS $arg $3 $4' $ARRAYSIZE $OUT $NJOBS $PRT $tmp
+		printf "\n"
 	fi
 else
 	REQMEM=""
 	echo "[INFO]      --  Estimating required memory"
-	M1=$(awk -f wGRR_mem.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT $OUT.allpairs.txt $PRT $OUT.m8)
+	M1=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.allpairs.txt $PRT $OUT.m8)
 	REQMEM=$(bc -l <<< $(numfmt --from=iec $M1)*($ARRAYSIZE/10) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}')
 	echo "[INFO]      --  $REQMEM per job required"
 	duration=$SECONDS
 	printf "%-10s  --  %s\n" "[TIME]" $(textifyDuration $duration)
 	echo "[INFO]      --  Submitting $NJOBS jobs for wGRR calculation"
-	if [[ $PARTITION == "common" ]] ; then
-		FAST_PART="common,dedicated"
-	else
-		FAST_PART=$PARTITION
-	fi
 	if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then
 		echo "[INFO]      --  Limiting the number of simultaneous jobs to $MAXJOBS"
 		JOBARRAY="1-${NJOBS}%${MAXJOBS}"
 	else
 		JOBARRAY="1-${NJOBS}"
 	fi
-	JID=$(sbatch --parsable --wait -p "$FAST_PART" --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp")
+	JID=$(sbatch --parsable --wait -p "$PARTITION" --array="$JOBARRAY" -c 1 -J "wGRR_worker" --mem=$REQMEM --wrap="./wGRR_worker.zsh $ARRAYSIZE $OUT $NJOBS 1 $PRT $tmp")
 	PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}')
 	echo "[INFO]      --  The job $JID has been "$(textifyDuration $PQT) "in queue"
 	QT=$((QT+PQT))
diff --git a/wGRR.awk b/wGRR.awk
index c21314a..80492fa 100755
--- a/wGRR.awk
+++ b/wGRR.awk
@@ -267,7 +267,7 @@ END {
 
 				if(gi==gj){
 					lc = length(cluster)
-					if(MEM){
+					if(!MEM){
 						print gi,gj,sumidR/mp,ncommonR/li,li,lj,sumidR/minprot,ncommonR/((lli+llj)/2),lli,llj,sumidC/lc,length(bbh_clust)/lc,lc,lc
 					}
 				}
@@ -278,7 +278,7 @@ END {
 						avg_wgrr3=(part_wgrr3[gj":"gi]+sumidC/lc)/2
 						split(final_out[gj":"gi],ff,"\t")
 						# Print twice for both directions of the wGRR
-						if(MEM){
+						if(!MEM){
 							print ff[1],ff[2],avg_wgrr1,ff[4],ff[5],ff[6],avg_wgrr2,ff[8],ff[9],ff[10],avg_wgrr3,ff[12],ff[13],ff[14]
 							print gi,gj,avg_wgrr1,ncommonR/((li+lj)/2),li,lj,avg_wgrr2,ncommonR/((lli+llj)/2),lli,llj,avg_wgrr3,length(bbh_clust)/((lci+lcj)/2),lci,lcj
 						}
-- 
GitLab