From 34cb42c47567f8c277b358e96b89c7105ad17bb4 Mon Sep 17 00:00:00 2001 From: jgugliel <julien.guglielmini@pasteur.fr> Date: Mon, 13 Mar 2023 11:26:59 +0100 Subject: [PATCH] A few simplifications --- wGRR | 52 ++++++++++++++++++++++++++----------------------- wGRR_worker.zsh | 4 ++-- 2 files changed, 30 insertions(+), 26 deletions(-) diff --git a/wGRR b/wGRR index 2c75c1c..b4b12fe 100755 --- a/wGRR +++ b/wGRR @@ -246,6 +246,8 @@ if ! echo "" | $AWKEXE '{a[1][2]=3}' &> /dev/null ; then exit 1 fi +alias awk=$AWKEXE + ## Check MMseqs if [[ $MMPATH == "N.O.P.A.T.H" ]] ; then if ! MMSEQS=$(command -v mmseqs) ; then @@ -263,13 +265,15 @@ else fi fi +alias mmseqs=$MMSEQS + if [[ ${IDLIST} != "N.O.L.I.S.T" ]] ; then printf "%-17s -- %s %s\n" "["$(textifyDuration $SECONDS)"]" "Sampling the input file ${PRT} according to the ${IDLIST} file" | tee -a ${OUT}.wgrr.log - $AWKEXE 'NR==FNR{a[$1]++;next}s!=""{print s;s=""}/^>/{k=0;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(g in a){print;k=1;s=""}next}k{s=s""$0}END{if(s!=""){print s}}' ${IDLIST} ${PRT} > ${PRT:t:r}.sample.prt + awk 'NR==FNR{a[$1]++;next}s!=""{print s;s=""}/^>/{k=0;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(g in a){print;k=1;s=""}next}k{s=s""$0}END{if(s!=""){print s}}' ${IDLIST} ${PRT} > ${PRT:t:r}.sample.prt PRT=${PRT:t:r}.sample.prt fi -STATS=($($AWKEXE '/^>/{p++;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(!a[g]++){c++};LNR=NR}{if(NR>LNR+1){n=1}}END{if(n!=1){n=0}print c"\t"p"\t"n"\t"p/c}' $PRT)) +STATS=($(awk '/^>/{p++;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(!a[g]++){c++};LNR=NR}{if(NR>LNR+1){n=1}}END{if(n!=1){n=0}print c"\t"p"\t"n"\t"p/c}' $PRT)) printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "Input file has" $STATS[1] "genomes and a total of" $STATS[2] "proteins" | tee -a ${OUT}.wgrr.log printf "%-17s -- %s %s\n" "["$(textifyDuration $SECONDS)"]" "Mean number of proteins per genome:" $STATS[4] | tee -a ${OUT}.wgrr.log @@ -295,7 +299,7 @@ fi if [[ $STATS[3] -eq 1 ]]; then printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Converting fasta file to sequential fasta" | tee -a ${OUT}.wgrr.log OPRT=$PRT - $AWKEXE '!/^>/{s=s$0;next}(s!=""){print s;s=""}{print}END{print s}' $OPRT > $OPRT:t:r_seq.prt + awk '!/^>/{s=s$0;next}(s!=""){print s;s=""}{print}END{print s}' $OPRT > $OPRT:t:r_seq.prt PRT=$OPRT:t:r_seq.prt fi @@ -303,11 +307,11 @@ if [[ -f $OUT.allpairs.txt ]] ; then printf "%-17s -- %s %s%s\n" "["$(textifyDuration $SECONDS)"]" "Using existing file" $OUT ".allpairs.txt" | tee -a ${OUT}.wgrr.log else printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Writing genomes pairs" | tee -a ${OUT}.wgrr.log - $AWKEXE 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $PRT > $OUT.allpairs.txt - printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "$(wc -l $OUT.allpairs.txt | $AWKEXE '{print $1}') pairs written" | tee -a ${OUT}.wgrr.log + awk 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $PRT > $OUT.allpairs.txt + printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "$(wc -l $OUT.allpairs.txt | awk '{print $1}') pairs written" | tee -a ${OUT}.wgrr.log fi -NPAIRS=$(wc -l $OUT.allpairs.txt | $AWKEXE '{print $1}') +NPAIRS=$(wc -l $OUT.allpairs.txt | awk '{print $1}') if [[ ! -f $OUT.allpairs.txt ]] || [[ $NPAIRS < 1 ]] ; then printf "${red}%-17s -- %s\n${normal}" "[ERROR]" "An error occurred when writing the $OUT.allpairs.txt file." | tee -a ${OUT}.wgrr.log exit 1 @@ -335,19 +339,19 @@ fi if [[ $SKIP == 0 || $BATCHFLAG == 1 ]] ; then if [[ $STATS[1] -gt 5 ]] ; then - $AWKEXE '/^>/{g=$1;gsub(/_[0-9]+$/,"",g);a[g]++;if(length(a)>5){exit};print;getline;print}' $PRT > $OUT.testrun.prt + awk '/^>/{g=$1;gsub(/_[0-9]+$/,"",g);a[g]++;if(length(a)>5){exit};print;getline;print}' $PRT > $OUT.testrun.prt else cp $PRT $OUT.testrun.prt fi - $AWKEXE 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $OUT.testrun.prt > $OUT.testrun.allpairs.txt + awk 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $OUT.testrun.prt > $OUT.testrun.allpairs.txt printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Running MMseqs on a sample file" | tee -a ${OUT}.wgrr.log - $MMSEQS easy-search $OUT.testrun.prt $OUT.testrun.prt $OUT.testrun.m8 $tmp -s 7.5 --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches > $OUT.testrun.mmseqs.search.log + mmseqs easy-search $OUT.testrun.prt $OUT.testrun.prt $OUT.testrun.m8 $tmp -s 7.5 --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches > $OUT.testrun.mmseqs.search.log - M2=$($AWKEXE -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.testrun.allpairs.txt $OUT.testrun.prt $OUT.testrun.m8) - REQMEM=$(bc -l <<< $(numfmt --from=iec $M2)*($ARRAYSIZE*0.15) | numfmt --to=iec | $AWKEXE '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') - REQMEMT=$(bc -l <<< $(numfmt --from=iec $REQMEM)*$THREADS | numfmt --to=iec | $AWKEXE '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') + M2=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.testrun.allpairs.txt $OUT.testrun.prt $OUT.testrun.m8) + REQMEM=$(bc -l <<< $(numfmt --from=iec $M2)*($ARRAYSIZE*0.15) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') + REQMEMT=$(bc -l <<< $(numfmt --from=iec $REQMEM)*$THREADS | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') if [[ $TESTRUN == 1 && $BATCHFLAG == 0 ]] ; then printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "With the current -a parameter (${ARRAYSIZE}) ${NJOBS} workers are required" | tee -a ${OUT}.wgrr.log @@ -383,9 +387,9 @@ rm -rf ${OUT}.bbh_part.*(N) if [[ -f $OUT.m8 ]] ; then printf "%-17s -- %s %s%s\n" "["$(textifyDuration $SECONDS)"]" "Using existing MMseqs output file" $OUT ".m8" | tee -a ${OUT}.wgrr.log else - MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT) + MIDENT=$(awk '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT) if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then - MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | $AWKEXE '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}')) + MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}')) printf "%-17s -- %s %s\n" "["$(textifyDuration $SECONDS)"]" "Setting MMseqs parameter" "$MMS_MAX_SEQ_PARAM" | tee -a ${OUT}.wgrr.log fi if [[ $BATCHFLAG == 1 ]] ; then @@ -393,16 +397,16 @@ else printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Submitting MMseqs search to Maestro" | tee -a ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "The command is:" >> ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "sbatch --wait --parsable -o ${OUT}.mmseqs.log -p ${PARTITION} -c ${THREADS} -J \"wGRR_MMSeqs\" --wrap=\"${MMSEQS} easy-search ${PRT} ${PRT} ${OUT}.m8 ${tmp} -s 7.5 --threads ${THREADS} --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches ${MMS_MAX_SEQ_PARAM}" >> ${OUT}.wgrr.log - JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="$MMSEQS easy-search $PRT $PRT ${OUT}.m8 ${tmp} -s 7.5 --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM") - PQT=$(sacct -X -j $JID -o Reserved -n | $AWKEXE '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}') + JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="mmseqs easy-search $PRT $PRT ${OUT}.m8 ${tmp} -s 7.5 --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM") + PQT=$(sacct -X -j $JID -o Reserved -n | awk '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}') printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log QT=$((QT+PQT)) else printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Submitting MMseqs search and linclust to Maestro" | tee -a ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "The command is:" >> ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "sbatch --wait --parsable -o ${OUT}.mmseqs.log -p ${PARTITION} -c ${THREADS} -J \"wGRR_MMSeqs\" --wrap=\"${MMSEQS} easy-search ${PRT} ${PRT} ${OUT}.m8 ${tmp} -s 7.5 --threads ${THREADS} --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches ${MMS_MAX_SEQ_PARAM} ; ${MMSEQS} easy-linclust ${PRT} ${OUT} ${tmp} --threads ${THREADS}" >> ${OUT}.wgrr.log - JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="$MMSEQS easy-search $PRT $PRT ${OUT}.m8 ${tmp} -s 7.5 --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM ; ${MMSEQS} easy-linclust ${PRT} ${OUT} ${tmp} --threads ${THREADS}") - PQT=$(sacct -X -j $JID -o Reserved -n | $AWKEXE '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}') + JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="mmseqs easy-search $PRT $PRT ${OUT}.m8 ${tmp} -s 7.5 --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM ; ${MMSEQS} easy-linclust ${PRT} ${OUT} ${tmp} --threads ${THREADS}") + PQT=$(sacct -X -j $JID -o Reserved -n | awk '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}') printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log QT=$((QT+PQT)) fi @@ -410,13 +414,13 @@ else printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Running MMseqs search" | tee -a ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "The command is:" >> ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "${MMSEQS} easy-search ${PRT} ${PRT} ${OUT}.m8 ${tmp} -s 7.5 --threads ${THREADS} --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches ${MMS_MAX_SEQ_PARAM} > ${OUT}.mmseqs.search.log" >> ${OUT}.wgrr.log - $MMSEQS easy-search $PRT $PRT $OUT.m8 $tmp -s 7.5 --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches $MMS_MAX_SEQ_PARAM > $OUT.mmseqs.search.log + mmseqs easy-search $PRT $PRT $OUT.m8 $tmp -s 7.5 --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches $MMS_MAX_SEQ_PARAM > $OUT.mmseqs.search.log if [[ $JACCARD == 1 ]] ; then printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Running MMseqs linclust" | tee -a ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "The command is:" >> ${OUT}.wgrr.log printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "${MMSEQS} easy-linclust ${PRT} ${OUT} ${tmp} --threads ${THREADS} > ${OUT}.mmseqs.linclust.log" >> ${OUT}.wgrr.log - $MMSEQS easy-linclust $PRT $OUT $tmp --threads $THREADS > $OUT.mmseqs.linclust.log + mmseqs easy-linclust $PRT $OUT $tmp --threads $THREADS > $OUT.mmseqs.linclust.log fi fi fi @@ -476,7 +480,7 @@ else mv $tmp/"$OUT".wgrr_part.* "$OUT".wgrr_part/ exit 1 fi - PQT=$(sacct -X -j $JID -o Reserved -n | $AWKEXE 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') + PQT=$(sacct -X -j $JID -o Reserved -n | awk 'NR==1{prevt=0}{t=0;n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];if(t<prevt){tt=tt+prevt}prevt=t}END{print tt+t}') printf "%-17s -- %s %s %s %s %s\n" "["$(textifyDuration $SECONDS)"]" "The job" $JID "has been" $(textifyDuration $PQT) "in queue" | tee -a ${OUT}.wgrr.log QT=$((QT+PQT)) mkdir -p "$OUT".logs @@ -484,7 +488,7 @@ else fi printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Sorting results" | tee -a ${OUT}.wgrr.log -sort -m "$tmp"/$OUT.wgrr_part.* | sort -u -k1,1V -k2,2V | $AWKEXE 'BEGIN{print "GenomeA\tGenomeB\twGRR\tSørensen-Dice\tCommon\tNprotA\tNprotB"}1' > $OUT.wgrr.txt +sort -m "$tmp"/$OUT.wgrr_part.* | sort -u -k1,1V -k2,2V | awk 'BEGIN{print "GenomeA\tGenomeB\twGRR\tSørensen-Dice\tCommon\tNprotA\tNprotB"}1' > $OUT.wgrr.txt if [[ ! -f "$OUT".wgrr.txt ]] ; then printf "${red}%-17s -- %s\n${normal}" "[ERROR]" "Failed to sort the wGRR table." | tee -a ${OUT}.wgrr.log @@ -501,7 +505,7 @@ if [[ ! -s ${OUT}.bbh.txt ]] ; then printf "%-17s -- %s\n" "[WARNING]" "Failed to produce the BBH output file." | tee -a ${OUT}.wgrr.log fi -NLINES=$(wc -l "$OUT".wgrr.txt | $AWKEXE '{print $1}') +NLINES=$(wc -l "$OUT".wgrr.txt | awk '{print $1}') NWGRR=$((STATS[1]*STATS[1]+1)) if [[ $NLINES -lt $NWGRR ]] ; then printf "${red}%-17s -- %s\n${normal}" "[ERROR]" "An error occurred during wGRR calculation:" | tee -a ${OUT}.wgrr.log @@ -517,7 +521,7 @@ fi if [[ $JACCARD == 1 ]] ; then printf "%-17s -- %s\n" "["$(textifyDuration $SECONDS)"]" "Jaccard index calculation" | tee -a ${OUT}.wgrr.log - $AWKEXE 'BEGIN{c=0;OFS="\t"}NR==FNR{p=substr($1,2);g=p;gsub(/_[^_]++$/,"",g);G[g][p]++;next}FILENAME==ARGV[2]{if(!($1 in A)){A[$1]++;c++}C[$2]=c}FILENAME==ARGV[3]{split("",C1,"");split("",C2,"");common=0;orphan1=0;orphan2=0;for(p in G[$1]){if(!(p in C)){orphan1++}else{C1[C[p]]++};for(q in G[$2]){if(!(q in C)){orphan2++}else{C2[C[q]]++}}}for(x in C1){if(x in C2){common++}}if(!($1 in O)){O[$1]++;print $1,$1,length(C1),length(C1),length(C1),"1"};if(!($2 in O)){O[$2]++;print $2,$2,length(C2),length(C2),length(C2),"1"}val=common/(length(C1)+length(C2)+orphan1+orphan2-common);print $1,$2,length(C1),length(C2),common,val;print $2,$1,length(C2),length(C1),common,val}' <(grep ">" $PRT) ${OUT}_cluster.tsv ${OUT}.allpairs.txt | sort -k1,1V -k2,2V > ${OUT}.jaccard.txt + awk 'BEGIN{c=0;OFS="\t"}NR==FNR{p=substr($1,2);g=p;gsub(/_[^_]++$/,"",g);G[g][p]++;next}FILENAME==ARGV[2]{if(!($1 in A)){A[$1]++;c++}C[$2]=c}FILENAME==ARGV[3]{split("",C1,"");split("",C2,"");common=0;orphan1=0;orphan2=0;for(p in G[$1]){if(!(p in C)){orphan1++}else{C1[C[p]]++};for(q in G[$2]){if(!(q in C)){orphan2++}else{C2[C[q]]++}}}for(x in C1){if(x in C2){common++}}if(!($1 in O)){O[$1]++;print $1,$1,length(C1),length(C1),length(C1),"1"};if(!($2 in O)){O[$2]++;print $2,$2,length(C2),length(C2),length(C2),"1"}val=common/(length(C1)+length(C2)+orphan1+orphan2-common);print $1,$2,length(C1),length(C2),common,val;print $2,$1,length(C2),length(C1),common,val}' <(grep ">" $PRT) ${OUT}_cluster.tsv ${OUT}.allpairs.txt | sort -k1,1V -k2,2V > ${OUT}.jaccard.txt fi rm -rf "$OUT".logs diff --git a/wGRR_worker.zsh b/wGRR_worker.zsh index 93349ae..3047985 100755 --- a/wGRR_worker.zsh +++ b/wGRR_worker.zsh @@ -22,7 +22,7 @@ textifyDuration() { echo "$txt" } -AWKEXE=$1 +alias awk=$1 ARRAYSIZE=$2 OUT=$3 NJOBS=$4 @@ -45,6 +45,6 @@ if [[ $STIME != "" ]] ; then printf "\r\033[K%-17s -- [%-50s] %s/%s %s" "[PROGRESS]" $(C=$((arg*50/NJOBS)) ; if [ $C -eq 0 ] ; then printf "=" ; else head -c $C < /dev/zero | tr "\0" "=" ; fi) $arg $NJOBS $(textifyDuration $((CTIME-STIME))) fi -$AWKEXE -v MINP=$MINP -v MAXP=$MAXP -v OBBH=${OUT}.bbh_part.${SLURM_ARRAY_TASK_ID} -f wGRR.awk $OUT.allpairs.txt $PRT $OUT.m8 | sort -k1,1V -k2,2V > $OUTFILE +awk -v MINP=$MINP -v MAXP=$MAXP -v OBBH=${OUT}.bbh_part.${SLURM_ARRAY_TASK_ID} -f wGRR.awk $OUT.allpairs.txt $PRT $OUT.m8 | sort -k1,1V -k2,2V > $OUTFILE exit 0 -- GitLab