Skip to content
Snippets Groups Projects
Commit 6bac946f authored by Alexis  CRISCUOLO's avatar Alexis CRISCUOLO :black_circle:
Browse files

1.1

parent 8d6e3948
No related branches found
No related tags found
No related merge requests found
...@@ -31,7 +31,10 @@ ...@@ -31,7 +31,10 @@
# = VERSIONS = # # = VERSIONS = #
# ============ # # ============ #
# # # #
VERSION=1.0; # VERSION=1.1; #
# + SSUdb inside a dedicated directory ($SSUDB_DIR) #
# #
# VERSION=1.0; #
# # # #
############################################################################################################## ##############################################################################################################
...@@ -131,7 +134,7 @@ ...@@ -131,7 +134,7 @@
# # # #
PUNZIP="$PIGZ_BIN --decompress"; PUNZIP="$PIGZ_BIN --decompress";
# # # #
# -- agrep ------------------------------------------------------------------------------------------------ # # -- zgrep ------------------------------------------------------------------------------------------------ #
# # # #
ZGREP="$ZGREP_BIN"; ZGREP="$ZGREP_BIN";
# # # #
...@@ -162,10 +165,12 @@ mandoc() { ...@@ -162,10 +165,12 @@ mandoc() {
USAGE: ASSU [options] <infile> [<infile> ...] USAGE: ASSU [options] <infile> [<infile> ...]
OPTIONS: OPTIONS:
-d <file> SSU databank file (default: SSUdb.gz in the same directory as ASSU.sh) -d <file> SSU databank file (default: db/SSUdb.gz in the same directory as ASSU)
-p <string> restricts the SSU databank to the specified pattern (default: none) -p <string> restricts the SSU databank to the specified (extended regex) pattern
-o <string> output SSU sequence file name (default: ssu.fasta) (default: none)
-O <string> writes the selected reads into the specified file name (default: none) -o <string> output FASTA-formatted SSU sequence file name (default: ssu.fasta)
-O <string> writes the selected reads into the specified FASTQ-formatted file name
(default: none)
-l <int> minimum sequence length (default: 1000) -l <int> minimum sequence length (default: 1000)
-L <int> minimum read length (default: AUTO) -L <int> minimum read length (default: AUTO)
-Q <int> minimum base Phred quality value (default: 20) -Q <int> minimum base Phred quality value (default: 20)
...@@ -174,7 +179,7 @@ mandoc() { ...@@ -174,7 +179,7 @@ mandoc() {
-F <float> minimum proportion of the majority base to infer that base (default: 0.8) -F <float> minimum proportion of the majority base to infer that base (default: 0.8)
-A <float> minimum ratio of the alternative base(s) to the majority one to add that -A <float> minimum ratio of the alternative base(s) to the majority one to add that
base(s) to the consensus (default: 0.2) base(s) to the consensus (default: 0.2)
-N set N when multiple bases at a position (default: not set) -N set N when multiple bases at a consensus position (default: not set)
-w <dir> path to the tmp directory (default: \$TMPDIR, otherwise /tmp) -w <dir> path to the tmp directory (default: \$TMPDIR, otherwise /tmp)
-t <int> thread numbers (default: 2) -t <int> thread numbers (default: 2)
-v verbose mode -v verbose mode
...@@ -183,8 +188,9 @@ mandoc() { ...@@ -183,8 +188,9 @@ mandoc() {
-h prints this help and exit -h prints this help and exit
EXAMPLES: EXAMPLES:
ASSU -t 24 -o 16s.fasta fwd.fastq.gz rev.fastq.gz sgl.fastq.bz2 ASSU -t 24 -o 16s.fasta fwd.fastq.gz rev.fastq.gz sgl.fastq.gz
ASSU -d SSUdb.gz -O 16s.fastq -p Clostridium -L 75 -v *.fastq ASSU -d SSUdb.gz -O 16s.fastq -p "Devosia limi" -L 75 -v *.fastq
ASSU -p "Citrobacter|Escherichia|Shigella" -N -v hts.fastq.bz2
EOF EOF
} }
...@@ -212,11 +218,11 @@ chrono() { ...@@ -212,11 +218,11 @@ chrono() {
} }
# # # #
# -- dcontrol --------------------------------------------------------------------------------------------- # # -- dcontrol --------------------------------------------------------------------------------------------- #
# >> controls required and optional dependancies # # >> controls required and optional (non-coreutils) dependancies #
# # # #
dcontrol() { dcontrol() {
fail=false; fail=false;
for binexe in bc fold grep $ZGREP_BIN sed $GAWK_BIN $BWAMEM2_BIN $SAMTOOLS_BIN for binexe in bc grep $ZGREP_BIN sed $GAWK_BIN $BWAMEM2_BIN $SAMTOOLS_BIN
do do
if [ ! $(command -v $binexe) ]; then echo "[ERROR] $binexe not found" >&2 ; fail=true; fi if [ ! $(command -v $binexe) ]; then echo "[ERROR] $binexe not found" >&2 ; fail=true; fi
done done
...@@ -233,7 +239,7 @@ dcontrol() { ...@@ -233,7 +239,7 @@ dcontrol() {
} }
# # # #
# -- dcheck ----------------------------------------------------------------------------------------------- # # -- dcheck ----------------------------------------------------------------------------------------------- #
# >> checks dependancies and exit # # >> checks (non-coreutils) dependancies and exit #
# # # #
dcheck() { dcheck() {
echo "Checking required dependencies ..." ; echo "Checking required dependencies ..." ;
...@@ -243,12 +249,6 @@ dcheck() { ...@@ -243,12 +249,6 @@ dcheck() {
if [ ! $(command -v $binexe) ]; then echo -e "\e[31m[fail]\e[0m"; if [ ! $(command -v $binexe) ]; then echo -e "\e[31m[fail]\e[0m";
else echo -e -n "$(which $binexe)\t\t\e[32m[ok]\e[0m\t\t"; bc -v | head -1 ; else echo -e -n "$(which $binexe)\t\t\e[32m[ok]\e[0m\t\t"; bc -v | head -1 ;
fi fi
## fold ############################
echo -e -n "> \e[1mfold\e[0m >1.0 mandatory\t" ; binexe=fold;
echo -e -n "$binexe \t\t" ;
if [ ! $(command -v $binexe) ]; then echo -e "\e[31m[fail]\e[0m";
else echo -e -n "$(which $binexe)\t\t\e[32m[ok]\e[0m\t\t"; fold --version | head -1 ;
fi
## grep ############################ ## grep ############################
echo -e -n "> \e[1mgrep\e[0m >1.0 mandatory\t" ; binexe=grep; echo -e -n "> \e[1mgrep\e[0m >1.0 mandatory\t" ; binexe=grep;
echo -e -n "$binexe \t\t" ; echo -e -n "$binexe \t\t" ;
...@@ -345,24 +345,26 @@ dispdb() { ...@@ -345,24 +345,26 @@ dispdb() {
if [ $# -lt 1 ]; then mandoc ; exit 1 ; fi if [ $# -lt 1 ]; then mandoc ; exit 1 ; fi
SSUDB=$PWD/SSUdb.gz; # SSU databank -d SSUDB_DIR=${SSUDB_DIR:-$PWD/db}
OUTFILE=ssu.fasta; # FASTA outfile -o SSUDB=$SSUDB_DIR/SSUdb.gz; # SSU databank -d
OUTFQ="$NA"; # FASTQ outfile -O OUTFILE=ssu.fasta; # FASTA outfile -o
PATTERN="$NA"; # pattern -p OUTFQ="$NA"; # FASTQ outfile -O
SLGT=1000; # min seq lgt -l PATTERN="$NA"; # pattern -p
RLGT="00"; # min read lgt -L SLGT=1000; # min seq lgt -l
BQ=20; # min Phred -Q RLGT="00"; # min read lgt -L
MQ=20; # min map Phread -M BQ=20; # min Phred -Q
COV=50; # min cov depth -D MQ=20; # min map Phread -M
FQ=0.8; # min freq -F COV=50; # min cov depth -D
FH=0.2; # min het -A FQ=0.8; # min freq -F
AMB=true; # only N -N FH=0.2; # min het -A
DISPDB=false; # display SSUdb -s AMB=true; # only N -N
TMP_DIR=${TMPDIR:-/tmp}; # tmp directory -w DISPDB=false; # display SSUdb -s
NTHREADS=2; # no. threads -t TMP_DIR=${TMPDIR:-/tmp}; # tmp directory -w
VERBOSE=false; # verbose mode -v NTHREADS=2; # no. threads -t
VERBOSE=false; # verbose mode -v
while getopts d:p:o:O:l:L:Q:M:D:F:A:w:t:Nvcsh option DEBUG=false; # debug mode -X
while getopts d:p:o:O:l:L:Q:M:D:F:A:w:t:NvcshX option
do do
case $option in case $option in
d) SSUDB="$OPTARG" ;; d) SSUDB="$OPTARG" ;;
...@@ -382,6 +384,7 @@ do ...@@ -382,6 +384,7 @@ do
v) VERBOSE=true ;; v) VERBOSE=true ;;
c) dcheck ;; c) dcheck ;;
s) DISPDB=true ;; s) DISPDB=true ;;
X) DEBUG=true ;;
h) mandoc ; exit 0 ;; h) mandoc ; exit 0 ;;
\?) mandoc ; exit 1 ;; \?) mandoc ; exit 1 ;;
esac esac
...@@ -391,11 +394,24 @@ shift "$(( $OPTIND - 1 ))" ...@@ -391,11 +394,24 @@ shift "$(( $OPTIND - 1 ))"
NFILE=$#; # no. input file(s) NFILE=$#; # no. input file(s)
FQLIST="$@"; # specified input file(s) FQLIST="$@"; # specified input file(s)
$DEBUG && VERBOSE=true;
echo "# ASSU v$VERSION" ; echo "# ASSU v$VERSION" ;
echo "# $COPYRIGHT" ; echo "# $COPYRIGHT" ;
echo "+ https://gitlab.pasteur.fr/GIPhy/ASSU" ; echo "+ https://gitlab.pasteur.fr/GIPhy/ASSU" ;
$VERBOSE && echo "> Syst: $MACHTYPE" ; if $VERBOSE
$VERBOSE && echo "> Bash: $BASH_VERSION" ; then
echo "> Syst: $MACHTYPE" ;
echo "> Bash: $BASH_VERSION" ;
echo "> SSUdb: $SSUDB" ;
if [ -s ${SSUDB%.*}.version.txt ]
then
$BAWK '(NR==1){d=$2}
(NR==2){n=$1}
END {print"> SSUdb v"d" ("n" sequences)"}' ${SSUDB%.*}.version.txt
fi
fi
[ ! -e $SSUDB ] && echoxit "[ERROR] databank not found (option -d): $SSUDB" ; [ ! -e $SSUDB ] && echoxit "[ERROR] databank not found (option -d): $SSUDB" ;
[ -d $SSUDB ] && echoxit "[ERROR] not a file (option -d): $SSUDB" ; [ -d $SSUDB ] && echoxit "[ERROR] not a file (option -d): $SSUDB" ;
...@@ -547,30 +563,20 @@ MESSAGE="examining SSU databank" ; ...@@ -547,30 +563,20 @@ MESSAGE="examining SSU databank" ;
echo -n "$(chrono) $MESSAGE ." ; echo -n "$(chrono) $MESSAGE ." ;
## indexing SSU databank ####################################################### ## indexing SSU databank #######################################################
if [ "$PATTERN" == "$NA" ] if [ "$PATTERN" != "$NA" ] && [ $($ZGREP -c -E "$PATTERN" $SSUDB) -eq 0 ]
then then
$BWA_INDEX -p $SSU_IDX $SSUDB &>/dev/null ; echo ".. [ok]" ;
else echo "> no selected sequence with the specified pattern: $PATTERN" ;
$ZGREP -A1 -F "$PATTERN" $SSUDB | grep -v -e "^--$" > $REF_SEQ ; finalize ;
exit ;
if [ ! -s $REF_SEQ ]
then
echo ".. [ok]" ;
echo "> no selected sequence with the specified pattern: $PATTERN" ;
finalize ;
exit ;
fi
$BWA_INDEX -p $SSU_IDX $REF_SEQ &>/dev/null ;
rm -f $REF_SEQ ;
fi fi
$BWA_INDEX -p $SSU_IDX $SSUDB &>/dev/null ;
echo -n "." ; echo -n "." ;
## aligning HTS reads ########################################################## ## aligning HTS reads ##########################################################
touch $SAM ; touch $SAM ;
[ $NTHREADS -gt 24 ] && DSRCopt="-t24" || DSRCopt="-t$NTHREADS" ; [ $NTHREADS -gt 24 ] && DSRCopt="-t24" || DSRCopt="-t$NTHREADS" ;
# BWAopt="-t $NTHREADS -k 21 -w 50 -D 1.0 -r 2.0 -T $(( $RLGT - 20 ))";
BWAopt="-t $NTHREADS -k 21 -w 50"; BWAopt="-t $NTHREADS -k 21 -w 50";
for fin in $FQLIST for fin in $FQLIST
do do
...@@ -588,18 +594,36 @@ done ...@@ -588,18 +594,36 @@ done
echo " [ok]" ; echo " [ok]" ;
## getting ref ################################################################# ## getting ref #################################################################
## maximizing the no. hits if $DEBUG
# accn=$($TAWK '{++c[$3]}END{for(a in c)if(c[a]>1)print c[a]" "a}' $SAM | sort -n | tail -1 | $BAWK '{print$2}'); then
## maximizing the avg score among the two most hitted references (as longer sequences often wins when using the basic criterion above) echo -e "no.hits\t\tsum.scores\tavg.score\tinfo" ;
accn=$($TAWK '{print$3"\t"$14}' $SAM | $TAWK '{print$3"\t"$14}' $SAM |
sed 's/AS:i://g' | sed 's/AS:i://g' |
$TAWK ' {m[$1]+=$2;++n[$1]} $TAWK ' {m[$1]+=$2;++n[$1]}
END{for(a in m)if(n[a]>10)print n[a]" "(m[a]/n[a])" "a}' | END{for(a in m)if(n[a]>10)print n[a]"\t"m[a]"\t"(m[a]/n[a])"\t"a}' |
sort -rn | head -2 | sort -rn | head |
sort -rgk2 | head -1 | $BAWK '{print$3}'); while read x y z a ; do echo -e "$x\t\t$y\t\t$z\t\t$($ZGREP -m1 -F $a $SSUDB)" ; done | tr -d '>' ;
fi
if [ "$PATTERN" == "$NA" ]
then
# maximizing the sum of alignment scores (field 14; AS:i:)
accn=$($TAWK '{print$3"\t"$14}' $SAM | sed 's/AS:i://g' |
$TAWK '{s[$1]+=$2}END{for(a in s)print s[a]"\t"a}' |
sort -rg | $TAWK '(NR==1){print$2;exit}');
else
# selecting accession(s) using $PATTERN
$ZGREP -E "$PATTERN" $SSUDB | $BAWK '{print$1}' | tr -d '>' > $TMP1 ;
# maximizing the sum of alignment scores (AS:i:) among the selected reference sequences
accn=$($TAWK '{print$3"\t"$14}' $SAM | sed 's/AS:i://g' |
$TAWK '{s[$1]+=$2}END{for(a in s)print s[a]"\t"a}' |
grep -F -f $TMP1 |
sort -rg | $TAWK '(NR==1){print$2;exit}');
fi
if [ -z "$accn" ] if [ -z "$accn" ]
then then
$VERBOSE && [ "$PATTERN" != "$NA" ] && echo "> selection pattern: $PATTERN" ;
echo "> no SSU found" ; echo "> no SSU found" ;
finalize ; finalize ;
echo "$(chrono) exit" ; echo "$(chrono) exit" ;
...@@ -634,7 +658,6 @@ MESSAGE="building SSU sequence" ; ...@@ -634,7 +658,6 @@ MESSAGE="building SSU sequence" ;
echo -n "$(chrono) $MESSAGE ." ; echo -n "$(chrono) $MESSAGE ." ;
## getting HTS reads ########################################################### ## getting HTS reads ###########################################################
# $TAWK -v l=$RLGT '(length($10)>=l){print$10"\t"$11"\t"$12}' $SAM | sed 's/NM:i://g' | $TAWK '($3<10){print$1"\t"$2}' | # NOTE: <10 mismatches against the best hit
$TAWK -v l=$RLGT '(length($10)>=l){print$10"\t"$11}' $SAM | $TAWK -v l=$RLGT '(length($10)>=l){print$10"\t"$11}' $SAM |
sort -u | $TAWK '{print"@SSU:"NR; sort -u | $TAWK '{print"@SSU:"NR;
print$1; print$1;
...@@ -676,16 +699,16 @@ $BWA_MEM $BWAopt $REF_IDX $FQTMP 2>/dev/null | ...@@ -676,16 +699,16 @@ $BWA_MEM $BWAopt $REF_IDX $FQTMP 2>/dev/null |
echo -n "." ; echo -n "." ;
## building consensus ########################################################## ## building consensus ##########################################################
# STCopt="--threads $NTHREADS --mode bayesian --ambig --min-MQ 2 --P-het 1.0e-100 --het-scale 1.0"; # STCopt="--threads $NTHREADS --mode bayesian --ambig --min-MQ 2 --P-het 1.0e-10 --het-scale 1.0";
STCopt="--threads $NTHREADS --mode simple --min-MQ $MQ --min-BQ $BQ --min-depth $COV --use-qual --call-fract $FQ --het-fract $FH"; STCopt="--threads $NTHREADS --mode simple --min-MQ $MQ --min-BQ $BQ --min-depth $COV --use-qual --call-fract $FQ --het-fract $FH";
$AMB && STCopt="$STCopt --ambig"; $AMB && STCopt="$STCopt --ambig";
SEQ=$($SAMTOOLS_CONSENSUS $STCopt $BAM 2>/dev/null | SEQ=$($SAMTOOLS_CONSENSUS $STCopt $BAM 2>/dev/null |
grep -v "^>" | tr -d '\n' | grep -v "^>" | tr -d '\n' |
sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | # trimming low
sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | # quality ends
sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | # of the SSU
sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;' | # sequence (if
sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;'); sed -r 's/^[ACGT]{0,5}[acgtRrYySsWwKkMmBbDdHhVvNn]+//;s/[acgtRrYySsWwKkMmBbDdHhVvNn]+[ACGT]{0,5}$//;'); # any)
echo " [ok]" ; echo " [ok]" ;
...@@ -717,14 +740,6 @@ then ...@@ -717,14 +740,6 @@ then
while(++i<=w) switch(c[i]){ case "A": case "C": case "G": case "T": printf" "; continue; while(++i<=w) switch(c[i]){ case "A": case "C": case "G": case "T": printf" "; continue;
default: printf"*"; continue; } default: printf"*"; continue; }
print""; }' ; print""; }' ;
# for b in $(sed 's/./& /g' <<<$"$SEQ")
# do
# case $b in
# A|C|G|T) echo -n "$b" ;;
# *) echo -n -e "\e[31m$b\e[0m" ;;
# esac
# done
# echo ;
fi fi
if [ $l -lt $SLGT ] if [ $l -lt $SLGT ]
...@@ -744,7 +759,7 @@ then ...@@ -744,7 +759,7 @@ then
exit ; exit ;
fi fi
echo ">SSU_covr_$DEPTH""_lgt_$l""_amb_$a" > $OUTFILE ; echo ">contigSSU_covr_$DEPTH""_lgt_$l""_amb_$a" > $OUTFILE ;
echo "$SEQ" >> $OUTFILE ; echo "$SEQ" >> $OUTFILE ;
if [ "$OUTFQ" == "$NA" ] if [ "$OUTFQ" == "$NA" ]
then then
...@@ -763,3 +778,4 @@ echo "$(chrono) exit" ; ...@@ -763,3 +778,4 @@ echo "$(chrono) exit" ;
exit ; exit ;
...@@ -4,9 +4,9 @@ ...@@ -4,9 +4,9 @@
# ASSU # ASSU
_ASSU_ (ASSembling SSU) is a command line tool written in [Bash](https://www.gnu.org/software/bash/) to carry out the reference-guided assembly of small subunit (SSU) 16S ribosomal ribonucleic acid (rRNA) using short high-throughput sequencing (HTS) reads derived from whole genome sequencing of bacteria or archaea strains. _ASSU_ (ASSembling SSU) is a command line tool written in [Bash](https://www.gnu.org/software/bash/) to carry out the reference-guided assembly of small subunit (SSU) 16S ribosomal ribonucleic acid (rRNA) using short high-throughput sequencing (HTS) reads derived from whole genome) sequencing of bacteria or archaea strains.
This tool was developed to compensate the failure of several _de novo_ assembly programs to assemble a non-fragmented SSU segment when the sequenced genome contains different 16S rRNA copies with sequence variation, especially when using short HTS reads. This tool was developed to compensate the failure of several _de novo_ assembly programs to assemble (at least one) non-fragmented SSU segment when the sequenced genome contains different 16S rRNA copies with sequence variation, especially when using short HTS reads.
...@@ -14,21 +14,17 @@ This tool was developed to compensate the failure of several _de novo_ assembly ...@@ -14,21 +14,17 @@ This tool was developed to compensate the failure of several _de novo_ assembly
You will need to install the required programs and tools listed in the following tables, or to verify that they are already installed with the required version. You will need to install the required programs and tools listed in the following tables, or to verify that they are already installed with the required version.
##### Mandatory dependencies
<div align="center"> <div align="center">
| program | package | version | sources | | program | package | version | sources |
|:----------------------------------------------------- |:-----------------------------------------------:| ------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------- | |:----------------------------------------------------- |:-----------------------------------------------:| ------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------- |
| [_gawk_](https://www.gnu.org/software/gawk/) | - | > 4.0.0 | [ftp.gnu.org/gnu/gawk](http://ftp.gnu.org/gnu/gawk/) |
| _gunzip_<br>_zgrep_ | [_gzip_](https://www.gnu.org/software/gzip/) | > 1.0 | [ftp.gnu.org/gnu/gzip](https://ftp.gnu.org/gnu/gzip/) |
| [_bwa-mem2_](https://github.com/bwa-mem2/bwa-mem2) | - | &ge; 2.2.1 | [gitlab.pasteur.fr/GIPhy/contig_info](https://gitlab.pasteur.fr/GIPhy/contig_info) | | [_bwa-mem2_](https://github.com/bwa-mem2/bwa-mem2) | - | &ge; 2.2.1 | [gitlab.pasteur.fr/GIPhy/contig_info](https://gitlab.pasteur.fr/GIPhy/contig_info) |
| [_samtools_](http://www.htslib.org/) | - | &ge; 1.18 | [github.com/samtools/samtools](https://github.com/samtools/samtools)<br>[sourceforge.net/projects/samtools](https://sourceforge.net/projects/samtools) | | [_samtools_](http://www.htslib.org/) | - | &ge; 1.18 | [github.com/samtools/samtools](https://github.com/samtools/samtools)<br>[sourceforge.net/projects/samtools](https://sourceforge.net/projects/samtools) |
</div> </div>
##### Optional dependencies ##### Optional programs
<div align="center"> <div align="center">
...@@ -41,6 +37,22 @@ You will need to install the required programs and tools listed in the following ...@@ -41,6 +37,22 @@ You will need to install the required programs and tools listed in the following
</div> </div>
##### Standard GNU packages and utilities
<div align="center">
| program | package | version | sources |
|:------------------------------------------------------- |:-----------------------------------------------------:| ------------:|:------------------------------------------------------------------ |
| _echo_<br>_head_<br>_fold_<br>_paste_<br>_tail_<br>_tr_ | [coreutils](https://www.gnu.org/software/coreutils/) | > 8.0 | [ftp.gnu.org/gnu/coreutils](https://ftp.gnu.org/gnu/coreutils) |
| _gunzip_<br>_zgrep_ | [gzip](https://www.gnu.org/software/gzip/) | > 1.0 | [ftp.gnu.org/gnu/gzip](https://ftp.gnu.org/gnu/gzip/) |
| [_bc_](https://www.gnu.org/software/bc) | - | > 1.0 | [ftp.gnu.org/gnu/bc](https://ftp.gnu.org/gnu/bc) |
| [_gawk_](https://www.gnu.org/software/gawk) | - | > 4.0.0 | [ftp.gnu.org/gnu/gawk](http://ftp.gnu.org/gnu/gawk/) |
| [_grep_](https://www.gnu.org/software/grep) | - | > 2.0 | [ftp.gnu.org/gnu/bc](https://ftp.gnu.org/gnu/grep) |
| [_sed_](https://www.gnu.org/software/sed) | - | > 4.2 | [ftp.gnu.org/gnu/bc](https://ftp.gnu.org/gnu/sed) |
</div>
## Installation and execution ## Installation and execution
**A.** Clone this repository with the following command line: **A.** Clone this repository with the following command line:
...@@ -86,7 +98,7 @@ For each required program, the table below reports the corresponding variable as ...@@ -86,7 +98,7 @@ For each required program, the table below reports the corresponding variable as
./ASSU.sh [options] <infile> [<infile> ...] ./ASSU.sh [options] <infile> [<infile> ...]
``` ```
**F.** _ASSU_ also requires a databank of reference SSU sequences. By default, a version of this databank is provided as a file named `SSUdb.gz` (see details in `SSUdb.version.txt`). However, a more recent version can be quickly built using the provided script `makeSSUdb.sh` with the following command line: **F.** _ASSU_ also requires a databank of reference SSU sequences. By default, a version of this databank is provided inside the directory _db/_ as a file named `SSUdb.gz` (see details in `SSUdb.version.txt`). However, a more recent version can be quickly built using the provided script `makeSSUdb.sh` with the following command line:
```bash ```bash
./makeSSUdb.sh ./makeSSUdb.sh
...@@ -103,10 +115,12 @@ Run _ASSU_ without option to read the following documentation: ...@@ -103,10 +115,12 @@ Run _ASSU_ without option to read the following documentation:
USAGE: ASSU [options] <infile> [<infile> ...] USAGE: ASSU [options] <infile> [<infile> ...]
OPTIONS: OPTIONS:
-d <file> SSU databank file (default: SSUdb.gz in the same directory as ASSU.sh) -d <file> SSU databank file (default: db/SSUdb.gz in the same directory as ASSU)
-p <string> restricts the SSU databank to the specified pattern (default: none) -p <string> restricts the SSU databank to the specified (extended regex) pattern
-o <string> output SSU sequence file name (default: ssu.fasta) (default: none)
-O <string> writes the selected reads into the specified file name (default: none) -o <string> output FASTA-formatted SSU sequence file name (default: ssu.fasta)
-O <string> writes the selected reads into the specified FASTQ-formatted file name
(default: none)
-l <int> minimum sequence length (default: 1000) -l <int> minimum sequence length (default: 1000)
-L <int> minimum read length (default: AUTO) -L <int> minimum read length (default: AUTO)
-Q <int> minimum base Phred quality value (default: 20) -Q <int> minimum base Phred quality value (default: 20)
...@@ -115,7 +129,7 @@ Run _ASSU_ without option to read the following documentation: ...@@ -115,7 +129,7 @@ Run _ASSU_ without option to read the following documentation:
-F <float> minimum proportion of the majority base to infer that base (default: 0.8) -F <float> minimum proportion of the majority base to infer that base (default: 0.8)
-A <float> minimum ratio of the alternative base(s) to the majority one to add that -A <float> minimum ratio of the alternative base(s) to the majority one to add that
base(s) to the consensus (default: 0.2) base(s) to the consensus (default: 0.2)
-N set N when multiple bases at a position (default: not set) -N set N when multiple bases at a consensus position (default: not set)
-w <dir> path to the tmp directory (default: $TMPDIR, otherwise /tmp) -w <dir> path to the tmp directory (default: $TMPDIR, otherwise /tmp)
-t <int> thread numbers (default: 2) -t <int> thread numbers (default: 2)
-v verbose mode -v verbose mode
...@@ -124,28 +138,31 @@ Run _ASSU_ without option to read the following documentation: ...@@ -124,28 +138,31 @@ Run _ASSU_ without option to read the following documentation:
-h prints this help and exit -h prints this help and exit
EXAMPLES: EXAMPLES:
ASSU -t 24 -o 16s.fasta fwd.fastq.gz rev.fastq.gz sgl.fastq.bz2 ASSU -t 24 -o 16s.fasta fwd.fastq.gz rev.fastq.gz sgl.fastq.gz
ASSU -d SSUdb.gz -O 16s.fastq -p Clostridium -L 75 -v *.fastq ASSU -d SSUdb.gz -O 16s.fastq -p "Devosia limi" -L 75 -v *.fastq
ASSU -p "Citrobacter|Escherichia|Shigella" -N -v hts.fastq.bz2
``` ```
## Notes ## Notes
* In brief, _ASSU_ first quickly aligns the specified HTS reads against all the reference sequences available in the SSU databank using _bwa-mem2_. This first step enables to determine the most suited reference sequence (called model), as well as the subset of HTS reads that arise from SSU genome region(s). Next, every HTS read from the subset is accurately aligned against the model sequence, and the resulting alignments are processed by _samtools_ to build a final (consensus) sequence. * In brief, _ASSU_ first quickly aligns the specified HTS reads against all the reference sequences available in the SSU databank using _bwa-mem2_. This first step enables to determine the most suited reference sequence (called model), as well as the subset of HTS reads that arise from SSU genome regions. Next, every HTS read from the subset is accurately aligned against the model sequence, and the resulting alignments are processed by _samtools_ to build a final (consensus) sequence.
* _ASSU_ requires at least one HTS read file. Input file(s) should be in FASTQ format and can be compressed using [_gzip_](https://www.gnu.org/software/gzip/), [_bzip2_](https://sourceware.org/bzip2/) or [_DSRC_](https://github.com/refresh-bio/DSRC) (Roguski and Deorowicz 2014). Note that input files compressed using [_bzip2_](https://sourceware.org/bzip2/) or [_DSRC_](https://github.com/refresh-bio/DSRC) require the associated decompression tool to be read (see [Dependencies](#dependencies)). * _ASSU_ requires at least one HTS read file. Input file(s) should be in FASTQ format and can be compressed using [_gzip_](https://www.gnu.org/software/gzip/), [_bzip2_](https://sourceware.org/bzip2/) or [_DSRC_](https://github.com/refresh-bio/DSRC) (Roguski and Deorowicz 2014). Note that input files compressed using [_bzip2_](https://sourceware.org/bzip2/) or [_DSRC_](https://github.com/refresh-bio/DSRC) require the associated decompression tool to be read (see [Dependencies](#dependencies)).
* _ASSU_ is not working with long HTS reads, as _bwa-mem2_ is not developed to align HTS reads on significantly shorter reference sequences. The source code of _ASSU_ can be easily modified (on request) to deal with such a case, but long HTS reads generally lead to complete SSU segments via _de novo_ assembly. * _ASSU_ is not working with long HTS reads, as _bwa-mem2_ is not developed to align HTS reads on significantly shorter reference sequences. The source code of _ASSU_ can be easily modified (on request) to deal with such a case, but long HTS reads generally lead to complete SSU segments via _de novo_ assembly.
* By default, _ASSU_ expects that the SSU databank file `SSUdb.gz` is located in the same directory. However, an alternative SSU databank file (e.g. different version, different file name) can be specified using option `-d`. The content of the specified SSU databank can be summarized using option `-s`. * By default, _ASSU_ expects that the SSU databank file `SSUdb.gz` is located in the directory `db/`. However, an alternative SSU databank file (e.g. different version, different file name) can be specified using option `-d`. The content of the specified SSU databank can be summarized using option `-s`.
* The running time of _ASSU_ is very dependent on the size of the input files, but faster running times can be obtained using multiple threads (option `-t`) and/or a temporary directory located on a hard drive with high speed (option `-w`). Faster running times can be also observed by restricting the reference SSU sequences using a specified pattern (e.g. sequence accession, genus name; option `-p`). * The running time of _ASSU_ is very dependent on the size of the input files, but faster running times can be obtained using multiple threads (option `-t`) and/or a temporary directory located on a hard drive with high speed (option `-w`).
* The assembled sequence is written in FASTA format into an output file (option `-o`; default name: `ssu.fasta`). Optionally, the selected HTS reads can be written in FASTQ format into a specified output file (option `-O`). * The assembled sequence is written in FASTA format into an output file (option `-o`; default name: `ssu.fasta`). Optionally, the selected HTS reads can be written in FASTQ format into a specified output file (option `-O`).
* The selection of the model sequence can be oriented/forced by using the option `-p` to set a(n extended-regex) pattern (e.g. accessions, genus, species). It is recommended to specify the pattern between quotation marks.
* As the assembled SSU sequence is often the consensus of several copies with sequence variation within the sequenced genome (e.g. Větrovský and Baldrian 2013), it may contain ambiguous positions resulting from the consensus of different sequenced bases at those positions. In such cases, degenerated nucleotides are used to represent the consensus of different character states (see e.g. [Table 1](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2865858/table/T1/) in Johnson 2010), or lowercase characters when a deletion (i.e. gap) is involved in the consensus. Note that every degenerated nucleotide can be replaced by the character state `N` using option `-N`. * As the assembled SSU sequence is often the consensus of several copies with sequence variation within the sequenced genome (e.g. Větrovský and Baldrian 2013), it may contain ambiguous positions resulting from the consensus of different sequenced bases at those positions. In such cases, degenerated nucleotides are used to represent the consensus of different character states (see e.g. [Table 1](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2865858/table/T1/) in Johnson 2010), or lowercase characters when a deletion (i.e. gap) is involved in the consensus. Note that every degenerated nucleotide can be replaced by the character state `N` using option `-N`.
* The (number of) ambiguous positions can be slightly modified by considering shorter HTS reads (option `-L`), putative sequencing errors (option `-Q`), weak alignments (option `-M`) or low coverage depth (option `-D`). The consensus definition can be modified by tuning the two options `-F` and `-A` (corresponding to the options `--call-fract` and `--het-fract` of [samtools consensus](http://www.htslib.org/doc/samtools-consensus.html), respectively). * The (number of) ambiguous positions can be slightly modified by considering shorter HTS reads (option `-L`), putative sequencing errors (option `-Q`), weak alignments (option `-M`), low coverage depth (option `-D`) or alternative model sequence (option `-p`). The consensus definition can be modified by tuning the two options `-F` and `-A`, corresponding to the options `--call-fract` and `--het-fract` of [samtools consensus](http://www.htslib.org/doc/samtools-consensus.html) (mode simple), respectively.
* No output file is written in several situations: <br> &emsp; &bullet; insufficient coverage depth (default: at least 50&times;; option `-D`), <br> &emsp; &bullet; too short assembled SSU sequence (default: at least 1,000 bps; option `-l`), <br> &emsp; &bullet; too many ambiguous positions (i.e. more than 5%). * No output file is written in several situations: <br> &emsp; &bullet; insufficient coverage depth (default: at least 50&times;; option `-D`), <br> &emsp; &bullet; too short assembled SSU sequence (default: at least 1,000 bps; option `-l`), <br> &emsp; &bullet; too many ambiguous positions (i.e. more than 5%).
...@@ -172,23 +189,25 @@ Use the following command line to run _ASSU_ on these two FASTQ files using 12 t ...@@ -172,23 +189,25 @@ Use the following command line to run _ASSU_ on these two FASTQ files using 12 t
./ASSU.sh -t 12 -o FWSEC0011.ssu.fasta -v SRR7896249_*.fastq.gz ./ASSU.sh -t 12 -o FWSEC0011.ssu.fasta -v SRR7896249_*.fastq.gz
``` ```
Note that the SSU databank used for this assembly is the version 2024-02-16 (20,404 sequences). Note that the SSU databank used for this assembly is the version 2024-02-18 (20,404 sequences).
As the verbose mode was set (option `-v`), this command line leads to the following output: As the verbose mode was set (option `-v`), this command line leads to the following output:
``` ```
# ASSU v1.0 # ASSU v1.1
# Copyright (C) 2024 Institut Pasteur # Copyright (C) 2024 Institut Pasteur
+ https://gitlab.pasteur.fr/GIPhy/ASSU + https://gitlab.pasteur.fr/GIPhy/ASSU
> Syst: x86_64-redhat-linux-gnu > Syst: x86_64-redhat-linux-gnu
> Bash: 4.4.20(1)-release > Bash: 4.4.20(1)-release
> SSUdb: /local/bin//ASSU/db/SSUdb.gz
> SSUdb v2024-02-18 (20404 sequences)
[00:00] checking input files ... [ok] [00:00] checking input files ... [ok]
+ SRR7896249_1.fastq.gz + SRR7896249_1.fastq.gz
+ SRR7896249_2.fastq.gz + SRR7896249_2.fastq.gz
[00:00] creating tmp directory .... [ok] [00:00] creating tmp directory .... [ok]
> TMP_DIR=/tmp/ASSU.f2PzH4mWpX > TMP_DIR=/tmp/ASSU.uYf5cUoa6R
[00:00] examining SSU databank ...... [ok] [00:01] examining SSU databank ...... [ok]
> model: Bacteria | Escherichia fergusonii | NR_074902.1 | 1542 bps > model: Bacteria | Escherichia fergusonii | NR_074902.1 | 1542 bps
[00:24] building SSU sequence .... [ok] [00:27] building SSU sequence .... [ok]
> 3016 selected reads (903953 bases; lgt > 269) > 3016 selected reads (903953 bases; lgt > 269)
> coverage depth: 586x > coverage depth: 586x
> 1543 bps (ambiguous bases: 14) > 1543 bps (ambiguous bases: 14)
...@@ -226,9 +245,9 @@ As the verbose mode was set (option `-v`), this command line leads to the follow ...@@ -226,9 +245,9 @@ As the verbose mode was set (option `-v`), this command line leads to the follow
1501 ACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA 1501 ACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA
[00:27] writing output file ... [ok] [00:30] writing output file ... [ok]
+ FASTA: FWSEC0011.ssu.fasta + FASTA: FWSEC0011.ssu.fasta
[00:27] exit [00:30] exit
``` ```
The SSU sequence [NR_074902.1](https://www.ncbi.nlm.nih.gov/nuccore/NR_074902.1/) was selected as a model to carry out the reference-guided assembly using 3,016 HTS reads, leading to an assembled (consensus) sequence of length 1,543 bps (coverage depth: 586&times;) written into the FASTA file _FWSEC0011.ssu.fasta_. The SSU sequence [NR_074902.1](https://www.ncbi.nlm.nih.gov/nuccore/NR_074902.1/) was selected as a model to carry out the reference-guided assembly using 3,016 HTS reads, leading to an assembled (consensus) sequence of length 1,543 bps (coverage depth: 586&times;) written into the FASTA file _FWSEC0011.ssu.fasta_.
...@@ -455,22 +474,24 @@ As the HTS reads arise from an _E. coli_ genome, _ASSU_ can also be run by using ...@@ -455,22 +474,24 @@ As the HTS reads arise from an _E. coli_ genome, _ASSU_ can also be run by using
This command line leads to the following output: This command line leads to the following output:
``` ```
# ASSU v1.0 # ASSU v1.1
# Copyright (C) 2024 Institut Pasteur # Copyright (C) 2024 Institut Pasteur
+ https://gitlab.pasteur.fr/GIPhy/ASSU + https://gitlab.pasteur.fr/GIPhy/ASSU
> Syst: x86_64-redhat-linux-gnu > Syst: x86_64-redhat-linux-gnu
> Bash: 4.4.20(1)-release > Bash: 4.4.20(1)-release
> SSUdb: /local/bin/ASSU/db/SSUdb.gz
> SSUdb v2024-02-18 (20404 sequences)
[00:00] checking input files ... [ok] [00:00] checking input files ... [ok]
+ SRR7896249_1.fastq.gz + SRR7896249_1.fastq.gz
+ SRR7896249_2.fastq.gz + SRR7896249_2.fastq.gz
[00:00] creating tmp directory .... [ok] [00:00] creating tmp directory .... [ok]
> TMP_DIR=/tmp/ASSU.lc4DYy80KV > TMP_DIR=/tmp/ASSU.TxJar4O6ZP
[00:00] examining SSU databank ...... [ok] [00:00] examining SSU databank ...... [ok]
> selection pattern: Escherichia coli > selection pattern: Escherichia coli
> model: Bacteria | Escherichia coli | NR_114042.1 | 1467 bps > model: Bacteria | Escherichia coli | NR_114042.1 | 1467 bps
[00:15] building SSU sequence .... [ok] [00:25] building SSU sequence .... [ok]
> 2868 selected reads (859565 bases; lgt > 269) > 3016 selected reads (903953 bases; lgt > 269)
> coverage depth: 585x > coverage depth: 616x
> 1468 bps (ambiguous bases: 14) > 1468 bps (ambiguous bases: 14)
10 20 30 40 50 60 70 80 90 100 10 20 30 40 50 60 70 80 90 100
| | | | | | | | | | | | | | | | | | | |
...@@ -504,9 +525,9 @@ This command line leads to the following output: ...@@ -504,9 +525,9 @@ This command line leads to the following output:
1401 CAAAAGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAG 1401 CAAAAGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAG
[00:18] writing output file ... [ok] [00:29] writing output file ... [ok]
+ FASTA: ssu.fasta + FASTA: ssu.fasta
[00:18] exit [00:29] exit
``` ```
As expected, _ASSU_ assembles a similar 16S rRNA sequence using _E. coli_ as a model (e.g. same ambiguous positions). As expected, _ASSU_ assembles a similar 16S rRNA sequence using _E. coli_ as a model (e.g. same ambiguous positions).
......
File deleted
SSUdb.gz 2024-02-16 05:36:17
20404 sequences
built using makeSSUdb v1.0
File added
SSUdb.gz 2024-02-18 08:49:06
20404 sequences
built using makeSSUdb v1.1
...@@ -31,7 +31,10 @@ ...@@ -31,7 +31,10 @@
# = VERSIONS = # # = VERSIONS = #
# ============ # # ============ #
# # # #
VERSION=1.0; # VERSION=1.1; #
# + best compression ratio #
# #
# VERSION=1.0; #
# # # #
############################################################################################################## ##############################################################################################################
...@@ -150,7 +153,7 @@ echo "... [ok]" ; ...@@ -150,7 +153,7 @@ echo "... [ok]" ;
echo -n "compressing .." ; echo -n "compressing .." ;
n=$(grep -c "^>" $TMP_DIR/ssu.db); n=$(grep -c "^>" $TMP_DIR/ssu.db);
echo -n "." ; echo -n "." ;
$GZIP -c $TMP_DIR/ssu.db > SSUdb.gz ; $GZIP --best -c $TMP_DIR/ssu.db > SSUdb.gz ;
echo -n "." ; echo -n "." ;
{ echo -n "SSUdb.gz " ; { echo -n "SSUdb.gz " ;
date +"%Y-%m-%d %H:%M:%S" ; date +"%Y-%m-%d %H:%M:%S" ;
......
>SSU_covr_257_lgt_1543_amb_14 >contigSSU_covr_586_lgt_1543_amb_14
AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGRAARCAGCTTGCTGYTTYGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAACGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTWGTWGGTGGGGTAACGGCTCACCWAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGGAGTAAAGTTAATACCTTTGCTCATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTTGGAGGTTGTGCCCTTGAGGCGTGGCTTCCGGAGCTAACGCGTTAAGTCGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACRGAASTTTYCAGAGATGaGAWTgGTGCCTTCGGGAACYGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATCCTTTGTTGCCAGCGGTCCGGCCGGGAACTCAAAGGAGACTGCCAGTGATAAACTGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACACACGTGCTACAATGGCGCATACAAAGAGAAGCGACCTCGCGAGAGCAAGCGGACCTCATAAAGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGTGGATCAGAATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA AAATTGAAGAGTTTGATCATGGCTCAGATTGAACGCTGGCGGCAGGCCTAACACATGCAAGTCGAACGGTAACAGRAARCAGCTTGCTGYTTYGCTGACGAGTGGCGGACGGGTGAGTAATGTCTGGGAAACTGCCTGATGGAGGGGGATAACTACTGGAAACGGTAGCTAATACCGCATAACGTCGCAAGACCAAAGAGGGGGACCTTCGGGCCTCTTGCCATCGGATGTGCCCAGATGGGATTAGCTWGTWGGTGGGGTAACGGCTCACCWAGGCGACGATCCCTAGCTGGTCTGAGAGGATGACCAGCCACACTGGAACTGAGACACGGTCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGGAGTAAAGTTAATACCTTTGCTCATTGACGTTACCCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACGAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCGTAAACGATGTCGACTTGGAGGTTGTGCCCTTGAGGCGTGGCTTCCGGAGCTAACGCGTTAAGTCGACCGCCTGGGGAGTACGGCCGCAAGGTTAAAACTCAAATGAATTGACGGGGGCCCGCACAAGCGGTGGAGCATGTGGTTTAATTCGATGCAACGCGAAGAACCTTACCTGGTCTTGACATCCACRGAASTTTYCAGAGATGaGAWTgGTGCCTTCGGGAACYGTGAGACAGGTGCTGCATGGCTGTCGTCAGCTCGTGTTGTGAAATGTTGGGTTAAGTCCCGCAACGAGCGCAACCCTTATCCTTTGTTGCCAGCGGTCCGGCCGGGAACTCAAAGGAGACTGCCAGTGATAAACTGGAGGAAGGTGGGGATGACGTCAAGTCATCATGGCCCTTACGACCAGGGCTACACACGTGCTACAATGGCGCATACAAAGAGAAGCGACCTCGCGAGAGCAAGCGGACCTCATAAAGTGCGTCGTAGTCCGGATTGGAGTCTGCAACTCGACTCCATGAAGTCGGAATCGCTAGTAATCGTGGATCAGAATGCCACGGTGAATACGTTCCCGGGCCTTGTACACACCGCCCGTCACACCATGGGAGTGGGTTGCAAAAGAAGTAGGTAGCTTAACCTTCGGGAGGGCGCTTACCACTTTGTGATTCATGACTGGGGTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTA
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment