Commit 69575d3f authored by Alexis  CRISCUOLO's avatar Alexis CRISCUOLO

v1.3.190304ac

parent 35f8c05b
......@@ -20,15 +20,17 @@ This will create the executable jar file `REQ.jar` that could be launched with t
java -jar REQ.jar [files]
```
#### Building a native code binary
#### Building a native executable
On computers with the [GNU compiler GCJ](https://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcj/) installed, a binary could also be built. In a command-line window, go to the _src_ directory, and type:
On computers with [GraalVM](hhttps://www.graalvm.org/downloads/) installed, a native executable could also be built. In a command-line window, go to the _src_ directory, and type:
```bash
make
javac REQ.java
native-image -H:Name=REQ -H:-MultiThreaded REQ
rm REQ.class
```
This will create the executable binary file `req` that could be launched with the following command line model:
This will create the native executable `REQ` that could be launched with the following command line model:
```bash
./req [files]
./REQ [files]
```
## Usage
......@@ -56,7 +58,7 @@ The following command line writes into the file _tree.req.t_ the phylogenetic tr
```bash
REQ example/matrix.d example/tree.t tree.req.t -v
```
Because the option -v is set, the verbose mode will output the tree topology in NEWICK format, the list of leaf names, and, for each internal branch, the leaf quadripartition together with the rate of elementary quartets _Re_:
Because the option `-v` is set, the verbose mode will output the tree topology in NEWICK format, the list of leaf names, and, for each internal branch, the leaf quadripartition following by the rate of elementary quartets _Re_:
```
# (((((((17,18),16),((20,21),19)),(((4,((6,7),5)),(((2,3),1),0)),(8,9))),(10,11)),(12,13)),14,15);
0: P07_621_SLS
......@@ -95,7 +97,7 @@ Because the option -v is set, the verbose mode will output the tree topology in
[5,7,6,4][0,1,3,2][9,8][21,20,19,18,17,16,15,14,13,12,11,10] Re=1.000 (384/384)
[8][9][0,1,3,2,5,7,6,4][21,20,19,18,17,16,15,14,13,12,11,10] Re=1.000 (96/96)
[0,1,3,2,5,7,6,4][9,8][19,21,20,16,18,17][15,14,13,12,11,10] Re=0.453 (261/576)
[19,21,20,16,18,17][9,8,0,1,3,2,5,7,6,4][11,10][15,14,13,12] Re=0.488 (234/480)ù
[19,21,20,16,18,17][9,8,0,1,3,2,5,7,6,4][11,10][15,14,13,12] Re=0.487 (234/480)
[10][11][9,8,0,1,3,2,5,7,6,4,19,21,20,16,18,17][15,14,13,12] Re=1.000 (64/64)
[9,8,0,1,3,2,5,7,6,4,19,21,20,16,18,17][11,10][13,12][15,14] Re=0.594 (76/128)
[12][13][11,10,9,8,0,1,3,2,5,7,6,4,19,21,20,16,18,17][15,14] Re=1.000 (36/36)
......
@article{anisimova2006,
author = {Anisimova, M. and Gascuel, O.},
year = {2006},
title = {Approximate likelihood-ratio test for branches: a fast, accurate, and powerful alternative},
journal = {Systematic Biology},
volume = {55},
number = {4},
pages = {860--921},
doi = {10.1080/10635150600755453},
url = {https://doi.org/10.1080/10635150600755453}
}
@article{anisimova2011,
author = {Anisimova, M. and Gil, M. and Dufayard, J.-F. and Dessimoz, C. and Gascuel, O.},
year = {2011},
title = {Survey of branch support methods demonstrates accuracy, power, and robustness of fast likelihood-based approximation schemes},
journal = {Systematic Biology},
volume = {60},
number = {5},
pages = {685--699},
doi = {10.1093/sysbio/syr041},
url = {https://doi.org/10.1093/sysbio/syr041}
}
@article{bremer1988,
author = {Bremer, K.},
year = {1988},
title = {The limits of amino acid sequence data in angiosperm phylogenetic reconstruction},
journal = {Evolution},
volume = {42},
number = {4},
pages = {795--803},
doi = {10.1111/j.1558-5646.1988.tb02497.x},
url = {https://doi.org/10.1111/j.1558-5646.1988.tb02497.x}
}
@article{bremer1994,
author = {Bremer, K.},
year = {1994},
title = {Branch support and tree stability},
journal = {Cladistics},
volume = {10},
number = {3},
pages = {295--304},
doi = {10.1111/j.1096-0031.1994.tb00179.x},
url = {https://doi.org/10.1111/j.1096-0031.1994.tb00179.x}
}
@inproceedings{buneman1971,
author = {Buneman, P.},
year = {1971},
title = {The recovery of trees from measures of dissimilarity},
booktitle= {Mathematics in Archaeological and Historical Sciences},
editor = {Hodson, F.~R. and Kendall, D.~G. and Tautu, P.},
pages = {387--395},
publisher = {Edinburgh University Press},
address = {Edimburgh},
url = {http://homepages.inf.ed.ac.uk/opb/homepagefiles/phylogeny-scans/manuscripts.pdf}
}
@article{chapus2005,
author = {Chapus, C. and Dufraigne, C. and Edwards, S. and Giron, A. and Fertil, B. and Deschavanne, P.~J.},
year = {2005},
title = {Exploration of phylogenetic data using a global sequence analysis method},
journal = {BMC Evolutionary Biology},
volume = {5},
pages = {63},
doi = {10.1186/1471-2148-5-63},
url = {https://doi.org/10.1186/1471-2148-5-63}
}
@article{cohen2012,
author = {Cohen, E. and Chor, B.},
year = {2012},
title = {Detecting phylogenetic signals in eukaryotic whole genome sequences},
journal = {Journal of Computational Biology},
volume = {19},
number = {8},
pages = {945--956},
doi = {10.1089/cmb.2012.0122},
url = {https://doi.org/10.1089/cmb.2012.0122}
}
@article{desper2002,
author = {Desper, R. and Gascuel, O.},
year = {2002},
title = {Fast and accurate phylogeny reconstruction algorithms based on the minimum-evolution principle},
journal = {Journal of Computational Biology},
volume = {19},
number = {5},
pages = {687--705},
doi = {10.1089/106652702761034136},
url = {https://doi.org/10.1089/106652702761034136}
}
@article{felsenstein1985,
author = {Felsenstein, J.},
year = {1985},
title = {Confidence limits on phylogenies: an approach using the bootstrap},
journal = {Evolution},
volume = {39},
number = {4},
pages = {783--791},
doi = {10.1111/j.1558-5646.1985.tb00420.x},
url = {https://doi.org/10.1111/j.1558-5646.1985.tb00420.x}
}
@article{garcia2018,
author = {Garcia-Hermoso, D. and Criscuolo, A. and Lee, S.~C. and Legrand, M. and Chaouat, M. and Denis, B. and Lafaurie, M. and Rouveau, M. and Soler, C. and Schaal, J.~V. and Mimoun, M. and Mebazaa, A. and Heitman, J. and Dromer, F. and Brisse, S. and Bretagne, S. and Alanio, A.},
year = {2018},
title = {Outbreak of invasive wound mucormycosis in a burn unit due to multiple strains of Mucor circinelloides f. circinelloides resolved by whole-genome sequencing},
journal = {MBio},
volume = {9},
number = {2},
pages = {e00573-18},
doi = {10.1128/mBio.00573-18},
url = {http://mbio.asm.org/content/9/2/e00573-18}
}
@article{gascuel1997,
author = {Gascuel, O.},
year = {1997},
title = {BIONJ: an improved version of the NJ algorithm based on a simple model of sequence data},
journal = {Molecular Biology and Evolution},
volume = {14},
number = {7},
pages = {685--695},
doi = {10.1093/oxfordjournals.molbev.a025808},
url = {https://doi.org/10.1093/oxfordjournals.molbev.a025808}
}
@inproceedings{guenoche2001,
author = {Guénoche, A. and Garreta, H.},
year = {2001},
title = {Can we have confidence in a tree representation?},
booktitle= {Computational Biology},
editor = {Gascuel, O. and Sagot, M.-F.},
pages = {45--56},
publisher = {Springer Berlin Heidelberg},
address = {Berlin, Heidelberg},
doi = {10.1007/3-540-45727-5_5},
url = {http://iml.univ-mrs.fr/editions/biblio/guenoche/QualiTree-1.pdf}
}
@article{henz2005,
author = {Henz, S.~R. and Huson, D.~H. and Auch, A.~F. and {Nieselt-Struwe}, K. and Schuster, S.~C.},
year = {2005},
title = {Whole-genome prokaryotic phylogeny},
journal = {Bioinformatics},
volume = {21},
number = {10},
pages = {2329--2335},
doi = {10.1093/bioinformatics/bth324},
url = {https://doi.org/10.1093/bioinformatics/bth324}
}
@article{hoang2018a,
author = {Hoang, D.~T. and Chernomor, O. and {von Haeseler}, A. and Minh, B.~Q. and Vinh, L.~S.},
year = {2018},
title = {UFBoot2: improving the ultrafast bootstrap approximation},
journal = {Molecular Biology and Evolution},
volume = {35},
number = {2},
pages = {518--522},
doi = {10.1093/molbev/msx281},
url = {https://doi.org/10.1093/molbev/msx281}
}
@article{hoang2018b,
author = {Hoang, D.~T. and Vinh, L.~S. and Flouri, T. and Stamatakis, A. and {von Haeseler}, A. and Minh, B.~Q.},
year = {2018},
title = {MPBoot: fast phylogenetic maximum parsimony tree inference and bootstrap approximation},
journal = {BMC Evolutionary Biology},
volume = {18},
number = {1},
pages = {11},
doi = {10.1186/s12862-018-1131-3},
url = {https://doi.org/10.1186/s12862-018-1131-3}
}
@article{house2014,
author = {House, C.~H. and Pellegrini, M. and {Fitz-Gibbon}, S.~T.},
year = {2014},
title = {Genome-wide gene order distances support clustering the gram-positive bacteria},
journal = {Frontiers in Microbiology},
volume = {5},
pages = {785},
doi = {10.3389/fmicb.2014.00785},
url = {https://dx.doi.org/10.3389%2Ffmicb.2014.00785}
}
@article{krajewski1990,
author = {Krajewski, C. and Dickerman, A.~W.},
year = {1990},
title = {Bootstrap analysis of phylogenetic trees derived from DNA hydridization distances},
journal = {Systematic Zoology},
volume = {39},
number = {4},
pages = {383--390},
doi = {10.2307/2992358},
url = {https://doi.org/10.2307/2992358}
}
@article{lemoine2018,
author = {Lemoine, F. and Domelevo-Entfellner, J.-B. and Wilkinson, E. and Correia, D. and {Davila Felipe}, M. and {De Oliveira}, T. and Gascuel, O.},
year = {2018},
title = {Renewing Felsenstein's phylogenetic bootstrap in the era of big data},
journal = {Nature},
volume = {556},
number = {7702},
pages = {452--456},
doi = {10.1038/s41586-018-0043-0 },
url = {http://dx.doi.org/10.1038/s41586-018-0043-0}
}
@article{makarenkov2010,
author = {Makarenkov, V. and Boc, A. and Xie, J. and Peres-Neto, P. and Lapointe, F.-J. and Legendre, P.},
year = {2010},
title = {Weighted bootstrapping: a correction method for assessing the robustness of phylogenetic trees},
journal = {BMC Evolutionary Biology},
volume = {10},
pages = {250},
doi = {10.1186/1471-2148-10-250},
url = {https://doi.org/10.1186/1471-2148-10-250}
}
@article{minh2013,
author = {Minh, B.~Q. and Nguyen, M.~A. and {von Haeseler}, A.},
year = {2013},
title = {Ultrafast approximation for phylogenetic bootstrap},
journal = {Molecular Biology and Evolution},
volume = {30},
number = {5},
pages = {1188--1195},
doi = {10.1093/molbev/mst024},
url = {https://doi.org/10.1093/molbev/mst024}
}
@incollection{pardi2016,
author = {Pardi, F. and Gascuel, O.},
year = {2016},
title = {Distance-based methods in phylogenetics},
booktitle = {Encyclopedia of Evolutionary Biology},
editor = {Kliman, R.},
publisher = {Academic Press},
pages = {458--465},
url = {https://hal-lirmm.ccsd.cnrs.fr/lirmm-01386569}
}
@article{saitou1987,
author = {Saitou, N. and Nei, M.},
year = {1987},
title = {The neighbor-joining method: a new method for reconstructing phylogenetic trees},
journal = {Molecular Biology and Evolution},
volume = {4},
number = {4},
pages = {406--425},
doi = {10.1093/oxfordjournals.molbev.a040454},
url = {https://doi.org/10.1093/oxfordjournals.molbev.a040454}
}
@article{spencer2007,
author = {Spencer, M. and Bryant, D. and Susko, E.},
year = {2007},
title = {Conditioned genome reconstruction: how to avoid choosing the conditioning genome},
journal = {Systematic Biology},
volume = {56},
number = {1},
pages = {25--43},
doi = {10.1080/10635150601156313},
url = {https://doi.org/10.1080/10635150601156313}
}
@article{studier1988,
author = {Studier, J.~A. and Kepler, K.~J.},
year = {1988},
title = {A note on the neighbour-joining method of Saitou and Nei},
journal = {Molecular Biology and Evolution},
volume = {5},
number = {1},
pages = {729--731},
doi = {10.1080/10635150601156313},
url = {https://doi.org/10.1080/10635150601156313}
}
@article{wang2006,
author = {Wang, L.~S. and Warnow, T. and Moret, B.~M. and Jansen, R.~K. and Raubeson, L.~A.},
year = {2006},
title = {Distance-based genome rearrangement phylogeny},
journal = {Journal of Molecular Evolution},
volume = {63},
number = {4},
pages = {473--483},
doi = {10.1007/s00239-005-0216-y},
url = {https://doi.org/10.1007/s00239-005-0216-y}
}
@article{zaretskii1965,
author = {Zaretskii, Z.~A.},
year = {1965},
title = {Constructing a tree on the basis of a set of distances between the hanging vertices [in Russian]},
note = {ПОСТРОЕНИЕ ДЕРЕВА ПО НАБОРУ РАССТОЯНИЙ МЕЖДУ ВИСЯЧИМИ ВЕРШИНАМИ [real title]},
journal = {Uspekhi Matematicheskikh Nauk},
volume = {20},
number = {6},
pages = {94--96},
url = {http://mi.mathnet.ru/eng/umn6134}
}
---
title: 'REQ: assessing branch supports of a distance-based phylogenetic tree with the rate of elementary quartets'
tags:
- phylogenetics
- branch support
- evolutionary distances
- quartets
- Java
authors:
- name: Alexis Criscuolo
orcid: 0000-0002-8212-5215
affiliation: 1
affiliations:
- name: Institut Pasteur – Bioinformatics and Biostatistics Hub – C3BI, USR 3756 IP CNRS – 25-28 Rue du Docteur Roux, 75015 Paris, France
index: 1
date: 13 July 2018
bibliography: paper.bib
---
# Summary
*REQ* is a program for quickly estimating a confidence value at each branch of a distance-based phylogenetic tree. Branch support assessment is commonly based on bootstrap procedures [@felsenstein1985; @makarenkov2010; @lemoine2018]. Unfortunately, as they are based on numerous resampling of aligned characters, such procedures require long running times, despite some recent advances [@minh2013; @hoang2018a; @hoang2018b]. In fact, direct branch support methods were already developed for character-based approaches that optimize maximum-parsimony or maximum-likelihood criteria, in order to achieve faster running times [@bremer1988; @bremer1994; @anisimova2006; @anisimova2011]. However, to our knowledge, no practical implementation of direct branch support methods is currently available for distance-based approaches.
Distance-based approaches proceed in two steps: a pairwise evolutionary distance is estimated between each pair of (biological) objects, and, next, an algorithm is used to infer the tree with branch lengths that best fits the evolutionary distance matrix [@pardi2016]. Because of their speed, distance-based methods are widely used for inferring phylogenetic trees. Moreover, as such algorithms only need a distance matrix, they allow phylogenetic analyses to be carried out from a wide range of data types, e.g. DNA-DNA hybridization experiments [@krajewski1990], gene orders [@wang2006; @house2014], gene content [@spencer2007], or unaligned genome sequences [@chapus2005; @henz2005; @cohen2012; @garcia2018]. Nevertheless, in such cases, standard bootstrap-based methods can not be used for estimating branch confidence values.
In order to fill this void, the program *REQ* was developed. This tool estimates the rate of elementary quartets (REQ) for each branch of a given phylogenetic tree from the associated distance matrix, as described by [@guenoche2001]. This method simply computes the proportion of four-leaf subtrees (i.e. quartets) induced by every internal branch that are supported by the four-point condition applied to the six corresponding pairwise evolutionary distances [@zaretskii1965; @buneman1971]. Therefore, this measure is not based on a random sampling (such as bootstrap-based confidence supports). The closer this measure is to 1, the more the corresponding branch is fully supported by the pairwise evolutionary distances.
The program *REQ* is available on [GitLab](https://gitlab.pasteur.fr/GIPhy/REQ) under the [licence GNU GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html). Implemented in Java, *REQ* could be used on every operating system with a simple command line. *REQ* only needs two input files: a distance matrix file in either PHYLIP lower-triangular or square format, and a phylogenetic tree file in NEWICK format created from the distance matrix by any standard phylogenetic tree reconstruction method, e.g. neighbor-joining [@saitou1987; @studier1988], BioNJ [@gascuel1997], FastME [@desper2002]. Although computing the REQ value for every branch of a phylogenetic tree on *n* leaves requires $O(n^5)$ time complexity, *REQ* running time is quite fast (e.g. ~5 seconds with *n* = 500 on a standard computer) and could therefore be used with large phylogenetic trees.
# References
GCJ=gcj
GCJFLAGS=-fsource=1.6 -march=native -msse2 -O3 -minline-all-stringops -fomit-frame-pointer -momit-leaf-frame-pointer -fstrict-aliasing -fno-store-check -fno-bounds-check -funroll-all-loops -Wall
OTHERFLAGS=-funsafe-math-optimizations -ffast-math
MAIN=REQ
EXEC=req
REQ: REQ.java
$(GCJ) $(GCJFLAGS) --main=$(MAIN) $(MAIN).java -o $(EXEC)
......@@ -3,7 +3,7 @@
REQ: estimating the rate of elementary quartets (REQ) for each
branch of a phylogenetic tree from a distance matrix
Copyright (C) 2017,2018 Alexis Criscuolo
Copyright (C) 2017,2018,2019 Alexis Criscuolo
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
......@@ -35,12 +35,13 @@ import java.text.*;
public class REQ {
// constants
final static String VERSION = "v1.2.180713ac";
final static String VERSION = "v1.3.190304ac";
final static int INF = Integer.MAX_VALUE;
// io
static BufferedReader in;
static BufferedWriter out;
static File dfile, tfile;
static boolean verbose;
static NumberFormat df;
......@@ -64,6 +65,8 @@ public class REQ {
//### man ############################################################################################################################################################################
if ( args.length < 3 ) {
System.out.println("");
System.out.println(" REQ " + VERSION);
System.out.println("");
System.out.println(" USAGE: REQ <dfile> <tfile> <outfile> [-v]");
System.out.println("");
......@@ -81,21 +84,23 @@ public class REQ {
df = NumberFormat.getNumberInstance(Locale.US); df.setGroupingUsed(false); df.setMaximumFractionDigits(3); df.setMinimumFractionDigits(3);
verbose = ( (args.length > 3) && args[3].equals("-v") ) ? true : false;
if ( verbose ) System.out.println("REQ " + VERSION);
if ( ! (dfile = new File(args[0])).exists() ) { System.err.println("distance matrix file does not exist: " + args[0]); System.exit(1); }
if ( ! (tfile = new File(args[1])).exists() ) { System.err.println("tree file does not exist: " + args[1]); System.exit(1); }
//### reading distance matrix dm #####################################################################################################################################################
in = new BufferedReader(new FileReader(new File(args[0])));
in = new BufferedReader(new FileReader(dfile));
while ( true ) try { if ( (line=in.readLine().trim()).length() != 0 ) break; } catch ( NullPointerException e ) { System.out.println("matrix file is empty"); System.exit(1); }
try { n = Integer.parseInt(line); } catch ( NumberFormatException e ) { System.out.println("matrix file is incorrectly formatted"); System.exit(1); }
try { n = Integer.parseInt(line); } catch ( NumberFormatException e ) { System.out.println("distance matrix file is incorrectly formatted: " + args[0]); System.exit(1); }
if ( n > 32760 ) { System.out.println("too many taxa (>32760)"); System.exit(1); }
lbl = new ArrayList<String>(n); dm = new float[n][]; i = -1;
while ( true ) {
try { line = in.readLine().trim(); } catch ( NullPointerException e ) { in.close(); break; }
split = line.split("\\s+"); lbl.add(split[0]); dm[++i] = new float[i]; j = 0; while ( ++j <= i ) dm[i][--j] = Float.parseFloat(split[++j]);
}
if ( ++i != n ) { System.out.println("matrix file is incorrectly formatted"); System.exit(1); }
if ( ++i != n ) { System.out.println("distance matrix file is incorrectly formatted: " + args[0]); System.exit(1); }
//### reading phylogenetic tree nwk ##################################################################################################################################################
nwk = new StringBuilder(""); in = new BufferedReader(new FileReader(new File(args[1])));
nwk = new StringBuilder(""); in = new BufferedReader(new FileReader(tfile));
while ( true ) try { nwk = nwk.append(in.readLine().trim()); } catch ( NullPointerException e ) { in.close(); break; }
tr = new StringBuilder(nwk.toString());
......@@ -129,7 +134,7 @@ public class REQ {
if ( u == sup ) {
last = tro.lastIndexOf(")"); tr = tro.insert(last, ')'); v = apc(tr, apc(tr, last)); tr = tr.insert(++v, '(');
++last; //# NOTE: closing parenthesis at index 'last' should not be considered for REQ calculations
//if ( verbose ) System.out.println("# " + tr.toString());
/*if ( verbose ) System.out.println("# " + tr.toString());*/
sup = 0; --u; continue;
}
//# parsing every internal branch e at index u in order to obtain lbl(STa) lbl(STb) | lbl(STc) lbl(T)-lbl(STa U STb U STc) ########
......@@ -151,7 +156,7 @@ public class REQ {
}
}
//# storing re value inside are #####################################################################################################
are.add(Double.valueOf(re=up/(dn=((double)sta.length)*((double)stb.length)*stc.length*std.length)));
are.add(Double.valueOf(re=up/(dn=((double)sta.length)*((double)stb.length)*((double)stc.length)*((double)std.length))));
if ( verbose ) System.out.println((Arrays.toString(sta) + Arrays.toString(stb) + Arrays.toString(stc) + Arrays.toString(std)).replaceAll(" ","") + " Re=" + df.format(re) + " (" + ((long)up) + "/" + ((long)dn) + ")");
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment