Skip to content
Snippets Groups Projects
Commit 6bdc780b authored by Alexis  CRISCUOLO's avatar Alexis CRISCUOLO :black_circle:
Browse files

2.1

parent 7015e593
Branches icy-3.0.0
No related merge requests found
......@@ -4,7 +4,7 @@
# #
# JolyTree: fast distance-based phylogenetic inference from unaligned genome sequences #
# #
COPYRIGHT="Copyright (C) 2017-2020 Institut Pasteur" #
COPYRIGHT="Copyright (C) 2017-2021 Institut Pasteur" #
# #
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU #
# General Public License as published by the Free Software Foundation, either version 3 of the License, or #
......@@ -33,7 +33,13 @@
# = VERSIONS = #
# ============ #
# #
VERSION=2.0.190926ac #
VERSION=2.1.211019ac #
# + commenting line 576, as some linux distribution incorrectly interpret "trap [arg] signal_spec" with #
# empty arg #
# + adding some conditions to deal with some FastME crashes observed when inferring large trees #
# + new option -x to prevent
# #
# VERSION=2.0.190926ac #
# + new F81/EI transformation formula using gamma shape parameter (option -a = 1.5 by default) #
# + option -f to to use the 4 nucleotide frequencies in F81/EI transformation; by default, to deal with #
# multiple contig files, JolyTree sets f(A)=f(T)=0.5*(A+T)/(A+C+G+T) and f(C)=f(G)=0.5*(C+G)/(A+C+G+T) #
......@@ -95,6 +101,10 @@
Criscuolo A (2019) A fast alignment-free bioinformatics procedure to infer accurate
distance-based phylogenetic trees from genome assemblies. RIO. doi:10.3897/rio.5.e36178
Criscuolo A (2020) On the transformation of MinHash-based uncorrected distances into
proper evolutionary distances for phylogenetic inference. F1000Research.
doi:10.12688/f1000research.26930.1
USAGE: JolyTree.sh -i <directory> -b <basename> [options]
OPTIONS:
......@@ -117,6 +127,7 @@
-n no BME tree inference (only pairwise distance estimates)
-r <int> number of steps when performing the ratchet-based BME tree search
(default: 100)
-x no branch support
-t <int> number of threads (default: 2)
EOF
......@@ -249,11 +260,13 @@ INFERTREE=true; # -n (none)
RATCHET=100; # -r (100)
RATCHET_LIMIT=200; # (static)
BRANCH_SUPPORT=true; # -x (none)
NPROC=2; # -t (2)
CHUNK=20; # -h (20)
WAITIME=0.5; # (auto from -t)
while getopts :i:b:s:q:k:c:a:d:r:t:h:nf option
while getopts :i:b:s:q:k:c:a:d:r:t:h:nfx option
do
case $option in
i) DATADIR="$OPTARG" ;;
......@@ -265,6 +278,7 @@ do
a) ALPHA="$($GAWK -v x=$OPTARG 'BEGIN{printf "%.20f", x+0}' | sed 's/0*$//g')" ;;
f) NFQ=4 ;;
n) INFERTREE=false ;;
x) BRANCH_SUPPORT=false ;;
r) RATCHET=$OPTARG ;;
h) CHUNK=$OPTARG ;;
t) NPROC=$OPTARG ;;
......@@ -559,7 +573,7 @@ if ! $INFERTREE ; then exit 0 ; fi
#############################################################################################################
#############################################################################################################
trap INT ;
# trap INT ;
function ctrl_c() {
echo -n " process interrupted: deleting files ... " ;
sleep 5 ;
......@@ -588,8 +602,14 @@ OUTTREE=$BASEFILE.tt;
$FASTME -i $DMAT -o $OUTTREE -s -f 12 -T 1 &> /dev/null ;
tblo=$(grep -B1 "Performed" $BASEFILE.dd_fastme_stat.txt | sed -n 1p | sed 's/.* //g' | sed 's/\.$//g');
[ -z "$tblo" ] && tblo=$(grep -o ":[0-9\.-]*" $OUTTREE | tr -d :- | paste -sd+ | bc -l | sed 's/^\./0./');
echo " step 0 $tblo" >&2 ;
echo "step 0 tbl=$tblo" ;
if [ -z "$tblo" ]
then
tblo=999999;
echo " step 0 NaN" >&2 ;
else
echo " step 0 $tblo" >&2 ;
echo "step 0 tbl=$tblo" ;
fi
cp $OUTTREE $BMETREE;
sed -f $TAXFILE $BMETREE > $BMETREE.tmp ; mv $BMETREE.tmp $BMETREE ; # <=> sed -f $TAXFILE -i $BMETREE ;
rm -f $BASEFILE.dd_fastme_stat.txt ;
......@@ -615,7 +635,7 @@ then
END {print" "n;i=0;while(++i<=n){printf lbl[i];j=0;while(++j<=n){printf(" %.8f",d[i][j])}print""}}' $DMAT.$x.c > $DMAT.$x.n ;
### ratchet-search tree search ########################################################################
$EXEC "$FASTME -i $DMAT.$x.n -u $OUTTREE -o $OUTTREE.$x.n -nB -s -T 1 ; sed 's/:-/:/g' $OUTTREE.$x.n > $OUTTREE.$x.m ; $FASTME -i $DMAT.$x.c -u $OUTTREE.$x.m -o $OUTTREE.$x.c -s -T 1 ; rm -f $DMAT.$x.n $DMAT.$x.m $DMAT.$x.n_fastme_stat.txt $OUTTREE.$x.n $OUTTREE.$x.m ;" &> /dev/null &
$EXEC "$FASTME -i $DMAT.$x.n -u $OUTTREE -o $OUTTREE.$x.n -nB -s -T 1 ; sed 's/:-/:/g' $OUTTREE.$x.n > $OUTTREE.$x.m ; $FASTME -i $DMAT.$x.c -u $OUTTREE.$x.m -o $OUTTREE.$x.c -s -T 1 -f 12 ; rm -f $DMAT.$x.n $DMAT.$x.m $DMAT.$x.n_fastme_stat.txt $OUTTREE.$x.n $OUTTREE.$x.m ;" &> /dev/null &
done
while [ $(jobs -r | wc -l) -gt 0 ]; do sleep $WAITIME ; done
......@@ -629,8 +649,9 @@ then
rm -f $DMAT.$x.c_fastme_stat.txt ;
out=" ";
[ -z "$tbl" ] && tbl=$(grep -o ":[0-9\.-]*" $OUTTREE.$x.c | tr -d :- | paste -sd+ | bc | sed 's/^\./0./') && out="+";
[ -z "$tbl" ] && tbl="NaN";
echo -n "$out step $step_prev $tbl" >&2 ;
if [ $(echo "$tbl<$tblo" | bc) -eq 0 ]
if [ "$tbl" == "NaN" ] || [ $(echo "$tbl<$tblo" | bc) -eq 0 ]
then
rm -f $OUTTREE.$x.c ;
echo " (epsilon=$v)" >&2 ;
......@@ -655,13 +676,18 @@ fi
#############################################################################################################
#############################################################################################################
echo -n "estimating branch supports ... " >&2 ;
$REQ $BASEFILE.d $BMETREE $OUTTREE ;
echo "[ok]" >&2 ;
mv $OUTTREE $BMETREE ;
echo "BME tree (tbl=$tblo) with branch supports written into $BMETREE" ;
rm -f $DMAT $TAXFILE $OUTTREE ;
if $BRANCH_SUPPORT
then
echo -n "estimating branch supports ... " >&2 ;
$REQ $BASEFILE.d $BMETREE $OUTTREE ;
echo "[ok]" >&2 ;
mv $OUTTREE $BMETREE ;
echo "BME tree (tbl=$tblo) with branch supports written into $BMETREE" ;
else
echo "BME tree (tbl=$tblo) written into $BMETREE" ;
fi
rm -f $DMAT $TAXFILE $OUTTREE ;
exit ;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment