From fa07acf10d9d2803718cb88cf08f6aa677aa59eb Mon Sep 17 00:00:00 2001 From: jgugliel <julien.guglielmini@pasteur.fr> Date: Mon, 28 Mar 2022 10:52:15 +0200 Subject: [PATCH] Removed evalue filtering because some self hits might have higher e-values and the other filters seem to be enough. --- wGRR | 7 ++++--- wGRR.awk | 15 +++++++++++++-- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/wGRR b/wGRR index 9d104f7..01e5f68 100755 --- a/wGRR +++ b/wGRR @@ -7,7 +7,7 @@ trap 'rm -rf "$tmp"' EXIT export LC_ALL=C SECONDS=0 -readonly VERSION=0.7 +readonly VERSION=0.8 bold=$(tput bold) normal=$(tput sgr0) @@ -21,8 +21,9 @@ display_usage() { echo "${bold}MANDATORY PARAMETERS:${normal}" echo " -i <string> fasta file with all proteins of all the compared genetic elements." echo " All identifiers must be formatted as:" - echo " >element_identifier_XXX" - echo " where XXX is a serie of numbers unique for each protein of each element." + echo " >elementId_XXX" + echo " where elementID is a string unique for each genetic element" + echo " and XXX is a serie of characters unique for each protein of each element." echo "${bold}OPTIONS:${normal}" echo " -p <string> Path to the mmseqs executable." echo " e.g. /usr/local/bin" diff --git a/wGRR.awk b/wGRR.awk index 52bec67..94c0c71 100755 --- a/wGRR.awk +++ b/wGRR.awk @@ -66,7 +66,7 @@ FILENAME==ARGV[3] { cov1=$3 cov2=$4 fid=$5 - evalue=$6 +# evalue=$6 bits=$7 # Fix for some evalues below awk precision @@ -77,7 +77,8 @@ FILENAME==ARGV[3] { } } - if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){ +# if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){ + if(cov1<=COV || cov2<=COV || fid<=ID){ next } @@ -265,6 +266,16 @@ END { lcj=lj_singleton+length(cluster) lci<lcj ? lc=lci : lc=lcj + if(mp==0){ + print "bad mp for genomes "gi" "gj + } + if(minprot==0){ + print "bad minprot for genomes"gi" "gj + } + if(lc==0){ + print "bad lc for genomes "gi" "gj + } + if(gi==gj){ lc = length(cluster) if(!MEM){ -- GitLab