Commit e2574715 authored by Alexis  CRISCUOLO's avatar Alexis CRISCUOLO

0.1.1.1

parent d5599559
This diff is collapsed.
/*
########################################################################################################
FASTA2MSA: combining consensus sequences into a multiple sequence alignment
Copyright (C) 2020 Institut Pasteur
This program is part of the package SAM2MSA.
This program is free software: you can redistribute it and/or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see
<http://www.gnu.org/licenses/>.
Contact:
Alexis Criscuolo alexis.criscuolo@pasteur.fr
Genome Informatics & Phylogenetics (GIPhy) giphy.pasteur.fr
Bioinformatics and Biostatistics Hub research.pasteur.fr/team/hub-giphy
USR 3756 IP CNRS research.pasteur.fr/team/bioinformatics-and-biostatistics-hub
Dpt. Biologie Computationnelle research.pasteur.fr/department/computational-biology
Institut Pasteur, Paris, FRANCE research.pasteur.fr
########################################################################################################
*/
import java.io.*;
import java.util.*;
import java.util.regex.*;
// ~/WorkingStation/Software/graalvm-ce-java11-19.3.1/bin/native-image -H:+NativeArchitecture FASTA2MSA FASTA2MSA
public class FASTA2MSA {
//### constants ################################################################
final static String VERSION = "0.1.200312ac";
final static String VERSION = "0.1.200711c";
final static String NOTHING = "N.o./.T.h.I.n.G";
final static String STDIN = "-";
final static String BLANK = " ";
......@@ -81,7 +109,7 @@ public class FASTA2MSA {
//##########################################################################################################
if ( args.length < 2 ) {
System.out.println("");
System.out.println(" FASTA2MSA v." + VERSION);
System.out.println(" FASTA2MSA v." + VERSION + " Copyright (C) 2020 Institut Pasteur");
System.out.println("");
System.out.println(" FASTA2MSA combines the different consensus sequences estimated by SAM2MAP or");
System.out.println(" MAP2FASTA into a multiple sequence alignment (MSA) in FASTA format.");
......@@ -106,6 +134,7 @@ public class FASTA2MSA {
System.out.println(" character containing at least one forbidden character state will");
System.out.println(" be discarded (default: \"\")");
System.out.println(" -V when set, only variable characters will be selected (default: not");
System.out.println(" set)");
System.out.println(" -s NUMBER discarding all characters before the specified position (default:");
System.out.println(" not set)");
System.out.println(" -e NUMBER discarding all characters after the specified position (default:");
......
import java.io.*;
import java.util.*;
/*
########################################################################################################
// ~/WorkingStation/Software/graalvm-ce-java11-19.3.1/bin/native-image -H:+NativeArchitecture MAP2FASTA MAP2FASTA
MAP2FASTA: inferring a consensus sequence from a MAP file
Copyright (C) 2020 Institut Pasteur
This program is part of the package SAM2MSA.
This program is free software: you can redistribute it and/or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see
<http://www.gnu.org/licenses/>.
Contact:
Alexis Criscuolo alexis.criscuolo@pasteur.fr
Genome Informatics & Phylogenetics (GIPhy) giphy.pasteur.fr
Bioinformatics and Biostatistics Hub research.pasteur.fr/team/hub-giphy
USR 3756 IP CNRS research.pasteur.fr/team/bioinformatics-and-biostatistics-hub
Dpt. Biologie Computationnelle research.pasteur.fr/department/computational-biology
Institut Pasteur, Paris, FRANCE research.pasteur.fr
########################################################################################################
*/
import java.io.*;
import java.util.*;
public class MAP2FASTA {
//### constants ################################################################
final static String VERSION = "0.1.200312ac";
final static String VERSION = "0.1.200711c";
final static String NOTHING = "N.o./.T.h.I.n.G";
final static String STDIN = "-";
final static int MIN_COV = 10; // default minimum coverage
......@@ -91,50 +119,20 @@ public class MAP2FASTA {
//##########################################################################################################
if ( args.length < 2 ) {
System.out.println("");
System.out.println(" MAP2FASTA v." + VERSION);
System.out.println(" MAP2FASTA v." + VERSION + " Copyright (C) 2020 Institut Pasteur");
System.out.println("");
System.out.println(" SAM2MAP infers a consensus sequence from read alignments against a reference");
System.out.println(" sequence in SAM format. The inferred consensus sequence has always the same size");
System.out.println(" as the reference one. At each position, the inferred character state is the");
System.out.println(" the majority-rule one within the aligned reads (option -f). For each position,");
System.out.println(" the inferred character state is associated to a map code:");
System.out.println(" U under-covered position (options -p or -c)");
System.out.println(" u position neighboring map code 'U'");
System.out.println(" O over-covered position (options -p or -C)");
System.out.println(" o position neighboring map code 'O'");
System.out.println(" S strand-biased position (option -s)");
System.out.println(" X position within SNP-rich or SNP-poor regions (options -x and -w)");
System.out.println(" M unbiased position");
System.out.println(" Inferred character states associated to the map code 'U' are always '?'.");
System.out.println(" Inferred character states that differ from the reference ones and associated to");
System.out.println(" the map codes 'u', 'O', 'o' or 'S' can be replaced by 'X' or not (option -m).");
System.out.println(" Inferred character states associated to the map code 'X' are replaced by 'x'");
System.out.println(" (options -x and -w).");
System.out.println(" The main output file is a map file that summarizes the read alignments against");
System.out.println(" the reference sequence. The inferred sequence is also written in FASTA format.");
System.out.println(" MAP2FASTA infers a consensus sequence from a MAP file.");
System.out.println("");
System.out.println(" USAGE: SAM2MAP [-i SAMFILE] [-r REFFILE] [-o BASENAME] ...");
System.out.println(" USAGE: MAP2FASTA [-i MAPFILE] ...");
System.out.println("");
System.out.println(" GENERAL OPTIONS:");
System.out.println("");
System.out.println(" -i FILE read alignment in SAM format; set \"-\" to read from standard input");
System.out.println(" -i FILE tab-delimited MAP file; set \"-\" to read from standard input");
System.out.println(" (mandatory)");
System.out.println(" -r FILE reference sequence(s) in FASTA format; should contain at least");
System.out.println(" one sequence used for the read alignment (mandatory)");
System.out.println(" -o BASENAME basename for output files (mandatory)");
System.out.println(" -n STRING name of the inferred sequence; when set, a unique sequence will");
System.out.println(" be written in a FASTA file with the specified name in the header");
System.out.println(" be written in a FASTA file with the specified name in the header");
System.out.println(" -v verbose mode");
System.out.println("");
System.out.println(" READ ALIGNMENT:");
System.out.println("");
System.out.println(" -Q INTEGER minimum allowed Phred score; sequenced bases associated to a");
System.out.println(" Phred score smaller than the specified threshold are not");
System.out.println(" considered (default: 20)");
System.out.println(" -q INTEGER minimum allowed mapping Phred score; aligned reads associated to");
System.out.println(" a Phred score smaller than the specified threshold are not");
System.out.println(" considered (default: 20)");
System.out.println("");
System.out.println(" READ COVERAGE:");
System.out.println("");
System.out.println(" -p NUMBER p-value to determine the coverage depth confidence interval;");
......@@ -144,15 +142,15 @@ public class MAP2FASTA {
System.out.println(" respectively (default: 0.005)");
System.out.println(" -c INTEGER coverage depth lower bound; if the number of aligned reads is");
System.out.println(" smaller than this threshold, the corresponding position is");
System.out.println(" considered as under-covered and associated to the map code 'U'");
System.out.println(" (default: estimated from the data via option -p, but at leat 10)");
System.out.println(" considered as under-covered and associated with the map code 'U'");
System.out.println(" (default: estimated from the data via option -p, but at least 10)");
System.out.println(" -C INTEGER coverage depth upper bound; if the number of aligned reads is");
System.out.println(" larger than this threshold, the corresponding position is");
System.out.println(" considered as over-covered and associated to the map code 'O'");
System.out.println(" considered as over-covered and associated with the map code 'O'");
System.out.println(" (default: estimated from the data via option -p)");
System.out.println(" -s INTEGER minimum number of reads for each strand to trust a position; if");
System.out.println(" if a position does not verify this condition, it is considered");
System.out.println(" as strand-biased and associated to the map code 'S' (default: 5)");
System.out.println(" a position does not verify this condition, it is considered as");
System.out.println(" strand-biased and associated with the map code 'S' (default: 5)");
System.out.println("");
System.out.println(" CHARACTER STATE INFERENCE:");
System.out.println("");
......
/*
########################################################################################################
SAM2MAP: inferring a consensus sequence from SAM-formatted read alignments
Copyright (C) 2020 Institut Pasteur
This program is part of the package SAM2MSA.
This program is free software: you can redistribute it and/or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License along with this program. If not, see
<http://www.gnu.org/licenses/>.
Contact:
Alexis Criscuolo alexis.criscuolo@pasteur.fr
Genome Informatics & Phylogenetics (GIPhy) giphy.pasteur.fr
Bioinformatics and Biostatistics Hub research.pasteur.fr/team/hub-giphy
USR 3756 IP CNRS research.pasteur.fr/team/bioinformatics-and-biostatistics-hub
Dpt. Biologie Computationnelle research.pasteur.fr/department/computational-biology
Institut Pasteur, Paris, FRANCE research.pasteur.fr
########################################################################################################
*/
import java.io.*;
import java.util.*;
// minimap2 -ax sr -t4 ref.fa r1.fq r2.fq | SAM2MAP -i - -r ref.fa -b out -n tax
public class SAM2MAP {
//### constants ################################################################
final static String VERSION = "0.1.200312ac";
final static String VERSION = "0.1.200711c";
final static String NOTHING = "N.o./.T.h.I.n.G";
final static String STDIN = "-";
final static int MIN_COV = 10; // default minimum coverage
......@@ -96,13 +125,13 @@ public class SAM2MAP {
//##########################################################################################################
if ( args.length < 2 ) {
System.out.println("");
System.out.println(" SAM2MAP v." + VERSION);
System.out.println(" SAM2MAP v." + VERSION + " Copyright (C) 2020 Institut Pasteur");
System.out.println("");
System.out.println(" SAM2MAP infers a consensus sequence from read alignments against a reference");
System.out.println(" sequence in SAM format. The inferred consensus sequence has always the same size");
System.out.println(" as the reference one. At each position, the inferred character state is the");
System.out.println(" the majority-rule one within the aligned reads (option -f). For each position,");
System.out.println(" the inferred character state is associated to a map code:");
System.out.println(" SAM2MAP infers a consensus sequence from SAM-formatted read alignments against a");
System.out.println(" reference. The inferred consensus sequence has always the same size as the");
System.out.println(" reference one. At each position, the inferred character state is the majority-");
System.out.println(" rule one within the aligned bases (option -f). For each position, the inferred");
System.out.println(" character state is associated with one of the following map code:");
System.out.println(" U under-covered position (options -p or -c)");
System.out.println(" u position neighboring map code 'U'");
System.out.println(" O over-covered position (options -p or -C)");
......@@ -110,11 +139,11 @@ public class SAM2MAP {
System.out.println(" S strand-biased position (option -s)");
System.out.println(" X position within SNP-rich or SNP-poor regions (options -x and -w)");
System.out.println(" M unbiased position");
System.out.println(" Inferred character states associated to the map code 'U' are always '?'.");
System.out.println(" Inferred character states that differ from the reference ones and associated to");
System.out.println(" the map codes 'u', 'O', 'o' or 'S' can be replaced by 'X' or not (option -m).");
System.out.println(" Inferred character states associated to the map code 'X' are replaced by 'x'");
System.out.println(" (options -x and -w).");
System.out.println(" Inferred character states associated with the map code 'U' are always '?'.");
System.out.println(" Inferred character states that differ from the reference ones and associated");
System.out.println(" with the map codes 'u', 'O', 'o' or 'S' can be replaced by 'X' or not (option");
System.out.println(" -m). Inferred character states associated with the map code 'X' are replaced by");
System.out.println(" 'x' (options -x and -w).");
System.out.println(" The main output file is a map file that summarizes the read alignments against");
System.out.println(" the reference sequence. The inferred sequence is also written in FASTA format.");
System.out.println("");
......@@ -133,11 +162,11 @@ public class SAM2MAP {
System.out.println("");
System.out.println(" READ ALIGNMENT:");
System.out.println("");
System.out.println(" -Q INTEGER minimum allowed Phred score; sequenced bases associated to a");
System.out.println(" -q INTEGER minimum allowed Phred score; sequenced bases associated with a");
System.out.println(" Phred score smaller than the specified threshold are not");
System.out.println(" considered (default: 20)");
System.out.println(" -q INTEGER minimum allowed mapping Phred score; aligned reads associated to");
System.out.println(" a Phred score smaller than the specified threshold are not");
System.out.println(" -Q INTEGER minimum allowed mapping Phred score; aligned reads associated");
System.out.println(" with a Phred score smaller than the specified threshold are not");
System.out.println(" considered (default: 20)");
System.out.println("");
System.out.println(" READ COVERAGE:");
......@@ -149,15 +178,15 @@ public class SAM2MAP {
System.out.println(" respectively (default: 0.005)");
System.out.println(" -c INTEGER coverage depth lower bound; if the number of aligned reads is");
System.out.println(" smaller than this threshold, the corresponding position is");
System.out.println(" considered as under-covered and associated to the map code 'U'");
System.out.println(" (default: estimated from the data via option -p, but at leat 10)");
System.out.println(" considered as under-covered and associated with the map code 'U'");
System.out.println(" (default: estimated from the data via option -p, but at least 10)");
System.out.println(" -C INTEGER coverage depth upper bound; if the number of aligned reads is");
System.out.println(" larger than this threshold, the corresponding position is");
System.out.println(" considered as over-covered and associated to the map code 'O'");
System.out.println(" considered as over-covered and associated with the map code 'O'");
System.out.println(" (default: estimated from the data via option -p)");
System.out.println(" -s INTEGER minimum number of reads for each strand to trust a position; if");
System.out.println(" if a position does not verify this condition, it is considered");
System.out.println(" as strand-biased and associated to the map code 'S' (default: 5)");
System.out.println(" a position does not verify this condition, it is considered as");
System.out.println(" strand-biased and associated with the map code 'S' (default: 5)");
System.out.println("");
System.out.println(" CHARACTER STATE INFERENCE:");
System.out.println("");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment