@@ -246,6 +246,8 @@ if ! echo "" | $AWKEXE '{a[1][2]=3}' &> /dev/null ; then
exit 1
fi
alias awk=$AWKEXE
## Check MMseqs
if[[$MMPATH=="N.O.P.A.T.H"]];then
if!MMSEQS=$(command-v mmseqs);then
...
...
@@ -263,13 +265,15 @@ else
fi
fi
alias mmseqs=$MMSEQS
if[[${IDLIST}!="N.O.L.I.S.T"]];then
printf"%-17s -- %s %s\n""["$(textifyDuration $SECONDS)"]""Sampling the input file ${PRT} according to the ${IDLIST} file" | tee-a${OUT}.wgrr.log
$AWKEXE'NR==FNR{a[$1]++;next}s!=""{print s;s=""}/^>/{k=0;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(g in a){print;k=1;s=""}next}k{s=s""$0}END{if(s!=""){print s}}'${IDLIST}${PRT}>${PRT:t:r}.sample.prt
awk'NR==FNR{a[$1]++;next}s!=""{print s;s=""}/^>/{k=0;g=substr($1,2);gsub(/_[^_]+$/,"",g);if(g in a){print;k=1;s=""}next}k{s=s""$0}END{if(s!=""){print s}}'${IDLIST}${PRT}>${PRT:t:r}.sample.prt
$AWKEXE'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}'$PRT>$OUT.allpairs.txt
awk'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}'$PRT>$OUT.allpairs.txt
$AWKEXE'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}'$OUT.testrun.prt >$OUT.testrun.allpairs.txt
awk'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[^_]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}'$OUT.testrun.prt >$OUT.testrun.allpairs.txt
printf"%-17s -- %s\n""["$(textifyDuration $SECONDS)"]""Running MMseqs on a sample file" | tee-a${OUT}.wgrr.log
printf"%-17s -- %s\n""["$(textifyDuration $SECONDS)"]""With the current -a parameter (${ARRAYSIZE}) ${NJOBS} workers are required" | tee-a${OUT}.wgrr.log
printf"${red}%-17s -- %s\n${normal}""[ERROR]""An error occurred during wGRR calculation:" | tee-a${OUT}.wgrr.log
...
...
@@ -517,7 +521,7 @@ fi
if[[$JACCARD== 1 ]];then
printf"%-17s -- %s\n""["$(textifyDuration $SECONDS)"]""Jaccard index calculation" | tee-a${OUT}.wgrr.log
$AWKEXE'BEGIN{c=0;OFS="\t"}NR==FNR{p=substr($1,2);g=p;gsub(/_[^_]++$/,"",g);G[g][p]++;next}FILENAME==ARGV[2]{if(!($1 in A)){A[$1]++;c++}C[$2]=c}FILENAME==ARGV[3]{split("",C1,"");split("",C2,"");common=0;orphan1=0;orphan2=0;for(p in G[$1]){if(!(p in C)){orphan1++}else{C1[C[p]]++};for(q in G[$2]){if(!(q in C)){orphan2++}else{C2[C[q]]++}}}for(x in C1){if(x in C2){common++}}if(!($1 in O)){O[$1]++;print $1,$1,length(C1),length(C1),length(C1),"1"};if(!($2 in O)){O[$2]++;print $2,$2,length(C2),length(C2),length(C2),"1"}val=common/(length(C1)+length(C2)+orphan1+orphan2-common);print $1,$2,length(C1),length(C2),common,val;print $2,$1,length(C2),length(C1),common,val}' <(grep">"$PRT)${OUT}_cluster.tsv ${OUT}.allpairs.txt | sort-k1,1V -k2,2V >${OUT}.jaccard.txt
awk'BEGIN{c=0;OFS="\t"}NR==FNR{p=substr($1,2);g=p;gsub(/_[^_]++$/,"",g);G[g][p]++;next}FILENAME==ARGV[2]{if(!($1 in A)){A[$1]++;c++}C[$2]=c}FILENAME==ARGV[3]{split("",C1,"");split("",C2,"");common=0;orphan1=0;orphan2=0;for(p in G[$1]){if(!(p in C)){orphan1++}else{C1[C[p]]++};for(q in G[$2]){if(!(q in C)){orphan2++}else{C2[C[q]]++}}}for(x in C1){if(x in C2){common++}}if(!($1 in O)){O[$1]++;print $1,$1,length(C1),length(C1),length(C1),"1"};if(!($2 in O)){O[$2]++;print $2,$2,length(C2),length(C2),length(C2),"1"}val=common/(length(C1)+length(C2)+orphan1+orphan2-common);print $1,$2,length(C1),length(C2),common,val;print $2,$1,length(C2),length(C1),common,val}' <(grep">"$PRT)${OUT}_cluster.tsv ${OUT}.allpairs.txt | sort-k1,1V -k2,2V >${OUT}.jaccard.txt