Skip to content
Snippets Groups Projects
Commit 30db0561 authored by Julien  GUGLIELMINI's avatar Julien GUGLIELMINI
Browse files

log changes

parent 5f2c90bf
No related branches found
No related tags found
No related merge requests found
......@@ -226,21 +226,19 @@ if [[ -f $OUT.m8 ]] ; then
echo "[INFO] -- Using existing MMseqs output file $OUT.m8"
else
MIDENT=$($AWKEXE '!/^>/{a[$0]++}END{for(i in a){if(a[i]>m){m=a[i]}}print m}' $PRT)
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
if [[ $((MIDENT*2)) -gt "$MMS_DEF_MAX_SEQS" ]] ; then
MMS_MAX_SEQ_PARAM=($(echo $((MIDENT*2)) | awk '{c=substr($1,2);gsub(/[0-9]/,0,c);print "--max-seqs "substr($1,1,1)+1""c}'))
echo "[INFO] -- Setting MMseqs parameter $MMS_MAX_SEQ_PARAM"
fi
if [[ $BATCHFLAG == 1 ]] ; then
printf "%-10s -- %s\n" "[INFO]" "Submitting MMseqs search to Maestro"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
JID=$(sbatch --wait --parsable -o "$OUT".mmseqs.log -p $PARTITION -c $THREADS -J "wGRR_MMSeqs" --wrap="$MMSEQS easy-search $PRT $PRT ${OUT}.m8 ${tmp} --threads $THREADS --format-output \"query,target,qcov,tcov,fident,evalue,bits\" --add-self-matches $MMS_MAX_SEQ_PARAM")
PQT=$(sacct -X -j $JID -o Reserved -n | awk '{n=split($1,a,"-");if(n>1){t=t+a[1]*86400};split(a[n],b,":");t=t+b[1]*3600+b[2]*60+b[3];print t}')
echo "[INFO] -- The job $JID has been "$(textifyDuration $PQT) "in queue"
QT=$((QT+PQT))
else
printf "%-10s -- %s\n" "[INFO]" "Running MMseqs search"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
$MMSEQS easy-search $PRT $PRT $OUT.m8 $tmp --threads $THREADS --format-output "query,target,qcov,tcov,fident,evalue,bits" --add-self-matches $MMS_MAX_SEQ_PARAM > $OUT.mmseqs.search.log
fi
fi
......@@ -254,8 +252,8 @@ duration=$SECONDS
if [[ -f $OUT.allpairs.txt ]] ; then
echo "[INFO] -- Using existing file $OUT.allpairs.txt"
else
printf "%-10s -- %s\n" "[INFO]" "Writing genomes pairs"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
printf "%-10s -- %s\n" "[INFO]" "Writing genomes pairs"
$AWKEXE 'BEGIN{x=1}/^>/{g=substr($1,2);gsub(/_[0-9]+$/,"",g);if(FNR==1){a[x]=g;++x;currg=g;next}if(g!=currg){a[x]=g;x++;currg=g}}END{i=0;while(++i in a){j=i;while(++j in a){print a[i]"\t"a[j]}}}' $PRT > $OUT.allpairs.txt
printf "%-10s -- %s\n" "[INFO]" "$(wc -l $OUT.allpairs.txt | awk '{print $1}') pairs written"
fi
......@@ -292,13 +290,13 @@ if [[ $BATCHFLAG == 0 ]] ; then
printf "\n"
fi
else
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
REQMEM=""
echo "[INFO] -- Estimating required memory"
M1=$(awk -f wGRR.awk -v MINP=1 -v MAXP=10 -v OUT=$OUT -v MEM=1 $OUT.allpairs.txt $PRT $OUT.m8)
REQMEM=$(bc -l <<< $(numfmt --from=iec $M1)*($ARRAYSIZE/10) | numfmt --to=iec | awk '{U=$0;gsub(/[^A-Za-z]/,"",U);V=$0;gsub(/[A-Za-z]+$/,"",V);split(V,a,".");n=split(a[1],b,"");c=b[1]+1;i=1;while(++i<=n){c=c"0"}print c""U}')
echo "[INFO] -- $REQMEM per job required"
duration=$SECONDS
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
echo "[INFO] -- Submitting $NJOBS jobs for wGRR calculation"
if [[ $MAXJOBS -gt 0 ]] && [[ $MAXJOBS -lt $NJOBS ]] ; then
echo "[INFO] -- Limiting the number of simultaneous jobs to $MAXJOBS"
......@@ -315,8 +313,8 @@ else
fi
duration=$SECONDS
printf "%-10s -- %s\n" "[INFO]" "Sorting results"
printf "%-10s -- %s\n" "[TIME]" $(textifyDuration $duration)
printf "%-10s -- %s\n" "[INFO]" "Sorting results"
sort -m "$tmp"/$OUT.wgrr_part.* | sort -u -k1,1V -k2,2V | $AWKEXE 'BEGIN{print "GenomeA\tGenomeB\twGRR1\tCommon1\tRealLengthA\tRealLengthB\twGRR2\tCommon2\tEffectiveLength1A\tEffectiveLength1B\twGRR3\tCommon3\tEffectiveLength2A\tEffectiveLength2B"}1' > $OUT.wgrr.txt
if [[ ! -f "$OUT".wgrr.txt ]] ; then
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment