Skip to content
Snippets Groups Projects
Commit 30db0561 authored by Julien  GUGLIELMINI's avatar Julien GUGLIELMINI
Browse files

log changes

parent 5f2c90bf
No related branches found
No related tags found
No related merge requests found
...@@ -226,21 +226,19 @@ if [[ -f $OUT.m8 ]] ; then ...@@ -226,21 +226,19 @@ if [[ -f $OUT.m8 ]] ; then
echo "[INFO] -- Using existing MMseqs output file $OUT.m8" echo "[INFO] -- Using existing MMseqs output file $OUT.m8"
else else
MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT) MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT)
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then
MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}')) MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}'))
echo "[INFO] -- Setting MMseqs parameter $MMS_MAX_SEQ_PARAM" echo "[INFO] -- Setting MMseqs parameter $MMS_MAX_SEQ_PARAM"
fi fi
if [[ $BATCHFLAG == 1 ]] ; then if [[ $BATCHFLAG == 1 ]] ; then
printf "%-10s -- %s\n" "[INFO]" "Submitting MMseqs search to Maestro" printf "%-10s -- %s\n" "[INFO]" "Submitting MMseqs search to Maestro"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="$MMSEQS easy-search $PRT $PRT ${OUT}.m8 ${tmp} --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM") JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="$MMSEQS easy-search $PRT $PRT ${OUT}.m8 ${tmp} --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM")
PQT=$(sacct -X -j $JID -o Reserved -n | awk '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}') PQT=$(sacct -X -j $JID -o Reserved -n | awk '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}')
echo "[INFO] -- The job $JID has been "$(textifyDuration $PQT) "in queue" echo "[INFO] -- The job $JID has been "$(textifyDuration $PQT) "in queue"
QT=$((QT+PQT)) QT=$((QT+PQT))
else else
printf "%-10s -- %s\n" "[INFO]" "Running MMseqs search" printf "%-10s -- %s\n" "[INFO]" "Running MMseqs search"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
$MMSEQS easy-search $PRT $PRT $OUT.m8 $tmp --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches $MMS_MAX_SEQ_PARAM > $OUT.mmseqs.search.log $MMSEQS easy-search $PRT $PRT $OUT.m8 $tmp --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches $MMS_MAX_SEQ_PARAM > $OUT.mmseqs.search.log
fi fi
fi fi
...@@ -254,8 +252,8 @@ duration=$SECONDS ...@@ -254,8 +252,8 @@ duration=$SECONDS
if [[ -f $OUT.allpairs.txt ]] ; then if [[ -f $OUT.allpairs.txt ]] ; then
echo "[INFO] -- Using existing file $OUT.allpairs.txt" echo "[INFO] -- Using existing file $OUT.allpairs.txt"
else else
printf "%-10s -- %s\n" "[INFO]" "Writing genomes pairs"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration) printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
printf "%-10s -- %s\n" "[INFO]" "Writing genomes pairs"
$AWKEXE 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[0-9]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $PRT > $OUT.allpairs.txt $AWKEXE 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[0-9]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $PRT > $OUT.allpairs.txt
printf "%-10s -- %s\n" "[INFO]" "$(wc -l $OUT.allpairs.txt | awk '{print $1}') pairs written" printf "%-10s -- %s\n" "[INFO]" "$(wc -l $OUT.allpairs.txt | awk '{print $1}') pairs written"
fi fi
...@@ -292,13 +290,13 @@ if [[ $BATCHFLAG == 0 ]] ; then ...@@ -292,13 +290,13 @@ if [[ $BATCHFLAG == 0 ]] ; then
printf "\n" printf "\n"
fi fi
else else
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
REQMEM="" REQMEM=""
echo "[INFO] -- Estimating required memory" echo "[INFO] -- Estimating required memory"
M1=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.allpairs.txt $PRT $OUT.m8) M1=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.allpairs.txt $PRT $OUT.m8)
REQMEM=$(bc -l <<< $(numfmt --from=iec $M1)*($ARRAYSIZE/10) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}') REQMEM=$(bc -l <<< $(numfmt --from=iec $M1)*($ARRAYSIZE/10) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}')
echo "[INFO] -- $REQMEM per job required" echo "[INFO] -- $REQMEM per job required"
duration=$SECONDS duration=$SECONDS
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
echo "[INFO] -- Submitting $NJOBS jobs for wGRR calculation" echo "[INFO] -- Submitting $NJOBS jobs for wGRR calculation"
if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then
echo "[INFO] -- Limiting the number of simultaneous jobs to $MAXJOBS" echo "[INFO] -- Limiting the number of simultaneous jobs to $MAXJOBS"
...@@ -315,8 +313,8 @@ else ...@@ -315,8 +313,8 @@ else
fi fi
duration=$SECONDS duration=$SECONDS
printf "%-10s -- %s\n" "[INFO]" "Sorting results"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration) printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
printf "%-10s -- %s\n" "[INFO]" "Sorting results"
sort -m "$tmp"/$OUT.wgrr_part.* | sort -u -k1,1V -k2,2V | $AWKEXE 'BEGIN{print "GenomeA\tGenomeB\twGRR1\tCommon1\tRealLengthA\tRealLengthB\twGRR2\tCommon2\tEffectiveLength1A\tEffectiveLength1B\twGRR3\tCommon3\tEffectiveLength2A\tEffectiveLength2B"}1' > $OUT.wgrr.txt sort -m "$tmp"/$OUT.wgrr_part.* | sort -u -k1,1V -k2,2V | $AWKEXE 'BEGIN{print "GenomeA\tGenomeB\twGRR1\tCommon1\tRealLengthA\tRealLengthB\twGRR2\tCommon2\tEffectiveLength1A\tEffectiveLength1B\twGRR3\tCommon3\tEffectiveLength2A\tEffectiveLength2B"}1' > $OUT.wgrr.txt
if [[ ! -f "$OUT".wgrr.txt ]] ; then if [[ ! -f "$OUT".wgrr.txt ]] ; then
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment