From fa07acf10d9d2803718cb88cf08f6aa677aa59eb Mon Sep 17 00:00:00 2001
From: jgugliel <julien.guglielmini@pasteur.fr>
Date: Mon, 28 Mar 2022 10:52:15 +0200
Subject: [PATCH] Removed evalue filtering because some self hits might have
 higher e-values and the other filters seem to be enough.

---
 wGRR     |  7 ++++---
 wGRR.awk | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/wGRR b/wGRR
index 9d104f7..01e5f68 100755
--- a/wGRR
+++ b/wGRR
@@ -7,7 +7,7 @@ trap 'rm -rf "$tmp"' EXIT
 export LC_ALL=C
 SECONDS=0
 
-readonly VERSION=0.7
+readonly VERSION=0.8
 
 bold=$(tput bold)
 normal=$(tput sgr0)
@@ -21,8 +21,9 @@ display_usage() {
 	echo "${bold}MANDATORY PARAMETERS:${normal}"
 	echo "   -i <string>       fasta file with all proteins of all the compared genetic elements."
 	echo "                        All identifiers must be formatted as:"
-	echo "                          >element_identifier_XXX"
-	echo "                     	  where XXX is a serie of numbers unique for each protein of each element."
+	echo "                          >elementId_XXX"
+	echo "                     	  where elementID is a string unique for each genetic element"
+	echo "                     	  and XXX is a serie of characters unique for each protein of each element."
 	echo "${bold}OPTIONS:${normal}"
 	echo "   -p <string>       Path to the mmseqs executable."
 	echo "                        e.g. /usr/local/bin"
diff --git a/wGRR.awk b/wGRR.awk
index 52bec67..94c0c71 100755
--- a/wGRR.awk
+++ b/wGRR.awk
@@ -66,7 +66,7 @@ FILENAME==ARGV[3] {
 	cov1=$3
 	cov2=$4
 	fid=$5
-	evalue=$6
+#	evalue=$6
 	bits=$7
 
 	# Fix for some evalues below awk precision
@@ -77,7 +77,8 @@ FILENAME==ARGV[3] {
 		}
 	}
 
-	if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){
+#	if(cov1<=COV || cov2<=COV || fid<=ID || evalue>=EVAL){
+	if(cov1<=COV || cov2<=COV || fid<=ID){
 		next
 	}
 
@@ -265,6 +266,16 @@ END {
 				lcj=lj_singleton+length(cluster)
 				lci<lcj ? lc=lci : lc=lcj
 
+				if(mp==0){
+					print "bad mp for genomes "gi" "gj
+				}
+				if(minprot==0){
+					print "bad minprot for genomes"gi" "gj
+				}
+				if(lc==0){
+					print "bad lc for genomes "gi" "gj
+				}
+
 				if(gi==gj){
 					lc = length(cluster)
 					if(!MEM){
-- 
GitLab