Skip to content
Snippets Groups Projects
Commit f489a96b authored by Thomas  BIGOT's avatar Thomas BIGOT
Browse files

adding hmmerAnnotationDecode

parent 014d2473
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python
import re,ast
def getClusterInfo(clusterID,baseURL):
result = dict()
url = baseURL + clusterID.split(".")[0] + "_annotations.txt"
f = open(url)
annotations = f.read()
m = re.search('FAMILIES\s(.*)',annotations)
result["families"] = ast.literal_eval(m.group(1))
m = re.search('LENGTH\s(.*)',annotations)
result["length"] = int(m.group(1))
result["id"] = clusterID
result["functions"] = []
listSeq = False
f.seek(0)
for line in f:
if line.startswith("FASTA SEQUENCE TITLES:"):
listSeq = True
elif listSeq:
result["functions"].append(line.split("|")[-1].strip())
result["nbSeq"] = len(result["functions"])
return(result)
def readHmmer(pathIn,pathOut,vfamDir,verbose=False):
f = open(pathIn)
out = open(pathOut,"w")
out.write("subject,e-value,hmmfrom,hmmto,alifrom,alito,clusterID,clusterLength,families,functions,clusterNbSeq\n")
currClusterInfo = {}
currHits = []
inContigResult = False
for line in f:
if line.startswith("Query:"):
currHits = []
m = re.search(r'Query:\s+([0-9a-zA-Z_\.]+)\s+',line)
fam = m.group(1)
currClusterInfo = getClusterInfo(fam,vfamDir)
print(fam)
elif line.startswith(">>"):
inContigResult = True
currSubject = line[3:-1].strip()
elif inContigResult:
if line.startswith("\n"):
inContigResult = False
else:
if (line.startswith(" #") or line.startswith(" --")):
continue
subjectData = (line.strip().split())
# # score bias c-Evalue i-Evalue hmmfrom hmm to alifrom ali to envfrom env to acc
newLine = currSubject+ "," + subjectData[5]+ "," + subjectData[6]+ "," + subjectData[7]+ "," + subjectData[9]+ "," + subjectData[10]+ "," + currClusterInfo["id"] + "," + str(currClusterInfo["length"])+ "," + str(currClusterInfo["families"]).replace(","," ") + "," + str(currClusterInfo["functions"]).replace(","," ") + "," + str(currClusterInfo["nbSeq"]) + "\n"
out.write(newLine)
if(verbose):
print(newLine)
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("hmmerResults", help="file generated by hmmer")
parser.add_argument("vfamDir", help="directory containing the annnotation info")
parser.add_argument("output", help="csv-file to produce, containing the hmmer results annotated with vfam information")
parser.add_argument("-v","--verbose", help="print info on screen")
args = parser.parse_args()
readHmmer(args.hmmerResults,args.output,args.vfamDir,args.verbose)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment