diff --git a/README.md b/README.md index 944963819332272c08db5a77685e10a03d274613..1a9c39aeceb7a63cabb054a9628fa18b6129c4f0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Concatenate -_Concatenate_ is a command line program written in [Java](https://docs.oracle.com/javase/8/docs/technotes/guides/language/index.html) that allows building a supermatrix of characters from a set of multiple sequence alignments (MSA). +_Concatenate_ is a command line program written in [Java](https://docs.oracle.com/javase/8/docs/technotes/guides/language/index.html) to build a supermatrix of characters from a set of multiple sequence alignments (MSA) in FASTA or PHYLIP format. ## Compilation and execution @@ -8,6 +8,10 @@ The source code of _Concatenate_ is inside the _src_ directory and could be comp #### Building an executable jar file +Clone this repository with the following command line: +```bash +git clone https://gitlab.pasteur.fr/GIPhy/Concatenate.git +``` On computers with [Oracle JDK](http://www.oracle.com/technetwork/java/javase/downloads/index.html) (6 or higher) installed, a Java executable jar file could be created. In a command-line window, go to the _src_ directory and type: ```bash javac Concatenate.java @@ -15,37 +19,51 @@ echo Main-Class: Concatenate > MANIFEST.MF jar -cmvf MANIFEST.MF Concatenate.jar Concatenate.class rm MANIFEST.MF Concatenate.class ``` -This will create the executable jar file `Concatenate.jar` that could be launched with the following command line model: +This will create the executable jar file `Concatenate.jar` that could be run with the following command line model: ```bash java -jar Concatenate.jar [options] ``` #### Building a native code binary -On computers with the [GNU compiler GCJ](https://gcc.gnu.org/onlinedocs/gcc-4.2.4/gcj/) installed, a binary could also be built. In a command-line window, go to the _src_ directory, and type: +Clone this repository with the following command line: ```bash -make +git clone https://gitlab.pasteur.fr/GIPhy/Concatenate.git +``` +On computers with [GraalVM](hhttps://www.graalvm.org/downloads/) installed, a native executable can be built. In a command-line window, go to the _src_ directory, and type: +```bash +javac Concatenate.java +native-image Concatenate Concatenate +rm Concatenate.class ``` -This will create the executable binary file `concatenate` that could be launched with the following command line model: +This will create the native executable `Conctenate` that can be run with the following command line model: ```bash -./concatenate [options] +./Concatenate [options] ``` + ## Usage Launch _Concatenate_ without option to read the following documentation: ``` --i <infile> To indicate the input file that contains containing every MSA file name (one - per line). If the character % is set before a file name, the corresponding - MSA will be not used to build the supermatrix of characters. Each MSA file - should contain one MSA (DNA, RNA, amino acid, O1, ...) in either FASTA or - PHYLIP format. Relative/absolute path is allowed for each MSA file (default: - datafiles.txt). --o <outfile> To set the output file name (default: supermatrix.phy or supermatrix.fasta) --f To write the supermatrix of characters in FASTA format (default: PHYLIP) --l <integer> To set the maximum length of the written sequences (default: infinity); when - option -f is not set, this option leads to a PHYLIP-interleaved ouput file + Concatenate + + USAGE: Concatenate [-i <infile>] [-o <outfile>] [-f] [-l <integer>] + + where options are: + + -i <infile> to indicate the input file that contains containing every MSA file name (one + per line). If the character % is set before a file name, the corresponding + MSA will be not used to build the supermatrix of characters. Each MSA file + should contain one MSA (DNA, RNA, amino acid, O1, ...) in either FASTA or + PHYLIP format. Relative/absolute path is allowed for each MSA file (default: + datafiles.txt). + -o <outfile> To set the output file name (default: supermatrix.phy or supermatrix.fasta) + -f To write the supermatrix of characters in FASTA format (default: PHYLIP) + -l <integer> To set the maximum length of the written sequences (default: infinity); when + option -f is not set, this option leads to a PHYLIP-interleaved ouput file + ``` ## Notes diff --git a/src/Concatenate.java b/src/Concatenate.java index 8300d000775309211b2ae91519f9b28793a11fa9..856b1b7a1c575ede0850af95d044a7298fcff7e1 100644 --- a/src/Concatenate.java +++ b/src/Concatenate.java @@ -1,31 +1,30 @@ /* - #################################################################### - Concatenate: building a supermatrix of characters by concatenating - a set of multiple sequence alignment + ######################################################################################################## - Copyright (C) 2018 Alexis Criscuolo - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or + Concatenate: building a supermatrix of characters by concatenating a set of multiple sequence alignment + + Copyright (C) 2018-2020 Institut Pasteur + + This program is free software: you can redistribute it and/or modify it under the terms of the GNU + General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. - You should have received a copy of the GNU General Public License - along with this program. If not, see <http://www.gnu.org/licenses/>. - - Contact: - Institut Pasteur - Bioinformatics and Biostatistics Hub - C3BI, USR 3756 IP CNRS - Paris, FRANCE + You should have received a copy of the GNU General Public License along with this program. If not, see + <http://www.gnu.org/licenses/>. + + Contact: + Alexis Criscuolo alexis.criscuolo@pasteur.fr + Genome Informatics & Phylogenetics (GIPhy) giphy.pasteur.fr + Bioinformatics and Biostatistics Hub research.pasteur.fr/team/hub-giphy + USR 3756 IP CNRS research.pasteur.fr/team/bioinformatics-and-biostatistics-hub + Dpt. Biologie Computationnelle research.pasteur.fr/department/computational-biology + Institut Pasteur, Paris, FRANCE research.pasteur.fr - alexis.criscuolo@pasteur.fr - #################################################################### + ######################################################################################################## */ import java.io.*; @@ -34,8 +33,9 @@ import java.util.*; public class Concatenate { // constants + final static String VERSION = "0.1b.201024ac"; final static String MISSING = "????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????????"; // one thousand unknown character states - final static String BLANK = " "; // one thousand blank characters + final static String BLANK = " "; // one thousand blank characters final static String NO_FILE = "n.o.f.i.l.e"; // options @@ -70,11 +70,13 @@ public class Concatenate { // ########################### if ( args.length < 2 ) { System.out.println(""); - System.out.println(" USAGE: Concatenate [options]"); + System.out.println(" Concatenate v." + VERSION + " Copyright (C) 2019-2020 Institut Pasteur"); + System.out.println(""); + System.out.println(" USAGE: Concatenate [-i <infile>] [-o <outfile>] [-f] [-l <integer>]"); System.out.println(""); System.out.println(" where options are:"); System.out.println(""); - System.out.println(" -i <infile> To indicate the input file that contains containing every MSA file name (one "); + System.out.println(" -i <infile> to indicate the input file that contains containing every MSA file name (one"); System.out.println(" per line). If the character % is set before a file name, the corresponding"); System.out.println(" MSA will be not used to build the supermatrix of characters. Each MSA file"); System.out.println(" should contain one MSA (DNA, RNA, amino acid, O1, ...) in either FASTA or"); diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 5f1dc3945706c60deef43737cc3dd2d82ebdbbff..0000000000000000000000000000000000000000 --- a/src/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -GCJ=gcj -GCJFLAGS=-fsource=1.6 -march=native -msse2 -O3 -minline-all-stringops -fomit-frame-pointer -momit-leaf-frame-pointer -fstrict-aliasing -fno-store-check -fno-bounds-check -funroll-all-loops -Wall -OTHERFLAGS=-funsafe-math-optimizations -ffast-math -MAIN=Concatenate -EXEC=concatenate - -Concatenate: Concatenate.java - $(GCJ) $(GCJFLAGS) --main=$(MAIN) $(MAIN).java -o $(EXEC)