diff --git a/wGRR b/wGRR index 9d104f7acf0059f13a4f05d47584c6e4355c388e..01e5f6844954a2a906ab91992cb663a88489eff7 100755 --- a/wGRR +++ b/wGRR @@ -7,7 +7,7 @@ trap 'rm -rf "$tmp"' EXIT export LC_ALL=C SECONDS=0 -readonly VERSION=0.7 +readonly VERSION=0.8 bold=$(tput bold) normal=$(tput sgr0) @@ -21,8 +21,9 @@ display_usage() { echo "${bold}MANDATORY PARAMETERS:${normal}" echo " -i <string> fasta file with all proteins of all the compared genetic elements." echo " All identifiers must be formatted as:" - echo " >element_identifier_XXX" - echo " where XXX is a serie of numbers unique for each protein of each element." + echo " >elementId_XXX" + echo " where elementID is a string unique for each genetic element" + echo " and XXX is a serie of characters unique for each protein of each element." echo "${bold}OPTIONS:${normal}" echo " -p <string> Path to the mmseqs executable." echo " e.g. /usr/local/bin" diff --git a/wGRR.awk b/wGRR.awk index 52bec67edb0e9c054d49bb6d11c9a1253a053d28..94c0c714876d3e4b537803c7b6597c6fc3ae5edb 100755 --- a/wGRR.awk +++ b/wGRR.awk @@ -66,7 +66,7 @@ FILENAME==ARGV[3] { cov1=$3 cov2=$4 fid=$5 - evalue=$6 +# evalue=$6 bits=$7 # Fix for some evalues below awk precision @@ -77,7 +77,8 @@ FILENAME==ARGV[3] { } } - if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){ +# if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){ + if(cov1<=COV || cov2<=COV || fid<=ID){ next } @@ -265,6 +266,16 @@ END { lcj=lj_singleton+length(cluster) lci<lcj ? lc=lci : lc=lcj + if(mp==0){ + print "bad mp for genomes "gi" "gj + } + if(minprot==0){ + print "bad minprot for genomes"gi" "gj + } + if(lc==0){ + print "bad lc for genomes "gi" "gj + } + if(gi==gj){ lc = length(cluster) if(!MEM){