diff --git a/ASSU.sh b/ASSU.sh index 3f7a7328d22573135c0c548d8cd4ae7bd1e8a96c..291cbf88b44164a4c2510c822768e479a854fc25 100755 --- a/ASSU.sh +++ b/ASSU.sh @@ -33,6 +33,8 @@ # # VERSION=1.1; # # + SSUdb inside a dedicated directory ($SSUDB_DIR) # +# + modified model sequence selection criterion # +# + updated verbose (option -v) # # # # VERSION=1.0; # # # @@ -316,22 +318,14 @@ dcheck() { # >> displays the content of the specified SSU databank # # # dispdb() { -# $ZGREP "^>" $1 | -# tr -d '>' | -# $BAWK ' {if($2=="A"){k="Archaea";++a} -# else {k="Bacteria";++b} -# if($3=="Candidatus")print k" | "$1" | "$3" "$4" "$5; -# else print k" | "$1" | "$3" "$4} -# END{print"# Archaea: "a; -# print"# Bacteria: "b}' ; $GUNZIP -c $1 2>/dev/null | tr -d '>' | paste - - | tr '\t' ' ' | $BAWK ' {if($2=="A"){k="Archaea";++a} else {k="Bacteria";++b} - if($3=="Candidatus")print k" | "$1" | "length($NF)" bps | "$3" "$4" "$5; + if($3=="Candidatus")print k" | "$1" | "length($NF)" bps | "$3" "$4" "$5; else print k" | "$1" | "length($NF)" bps | "$3" "$4} - END{print"# Archaea: "a; - print"# Bacteria: "b}' ; + END{print"# Archaea: "a; + print"# Bacteria: "b}' ; } # # ############################################################################################################## @@ -524,7 +518,7 @@ then case $fext in fq|fastq) sed -n 2~4p $f | $BAWK '(FNR>10000){exit}{print length()}' ;; gz) if $PZ ; then $PUNZIP -c -p $NTHREADS $f 2>/dev/null | sed -n 2~4p | $BAWK '(FNR>10000){exit}{print length()}' ; - else $GUNZIP -c $f 2>/dev/null | sed -n 2~4p | $BAWK '(FNR>10000){exit}{print length()}' ; fi ;; + else $GUNZIP -c $f 2>/dev/null | sed -n 2~4p | $BAWK '(FNR>10000){exit}{print length()}' ; fi ;; bz|bz2) $BUNZIP -c $f 2>/dev/null | sed -n 2~4p | $BAWK '(FNR>10000){exit}{print length()}' ;; dsrc|dsrc2) $DUNZIP $DSRCopt $f 2>/dev/null | sed -n 2~4p | $BAWK '(FNR>10000){exit}{print length()}' ;; esac @@ -616,9 +610,9 @@ else $ZGREP -E "$PATTERN" $SSUDB | $BAWK '{print$1}' | tr -d '>' > $TMP1 ; # maximizing the sum of alignment scores (AS:i:) among the selected reference sequences accn=$($TAWK '{print$3"\t"$14}' $SAM | sed 's/AS:i://g' | - $TAWK '{s[$1]+=$2}END{for(a in s)print s[a]"\t"a}' | - grep -F -f $TMP1 | - sort -rg | $TAWK '(NR==1){print$2;exit}'); + $TAWK '{s[$1]+=$2}END{for(a in s)print s[a]"\t"a}' | + grep -F -f $TMP1 | + sort -rg | $TAWK '(NR==1){print$2;exit}'); fi if [ -z "$accn" ]