Commit 1d26bd02 authored by fabrice's avatar fabrice
Browse files

Bug Fixe: remove duplicates in generated contact files

parent 64042ace
......@@ -3,6 +3,7 @@
"""
from __future__ import absolute_import, division, print_function
import sys
import json
import logging
from .base import get_filename
......@@ -73,9 +74,14 @@ class AriaEcContactMap(object):
outdir=self.settings.outdir)
self.allresmap[fo.filetype] = fo.mapdict
try:
refmap = self.refmap["contactmap"]
except TypeError:
logger.error("First contact map should be a valid file")
sys.exit(1)
nb_c = int(len(self.protein.aa_sequence.sequence) *
self.settings.setup.config.get("n_factor"))
refmap = self.refmap["contactmap"]
plotparams = {k: self.settings.contactmap.config.get(k, None)
for k in ('size_fig', 'plot_ext', 'plot_dpi')}
......@@ -86,7 +92,9 @@ class AriaEcContactMap(object):
if mapt == self.reftype:
refmap.write_contacts("_".join((self.outprefix, mapt)),
outdir=outdir)
outdir=outdir,
scoremap=self.refmap.get("scoremap",
None))
if self.settings.contactmap.config.get("save_fig"):
refmap.saveplot(outdir=outdir,
outprefix="_".join((self.outprefix, mapt)),
......@@ -121,6 +129,7 @@ class AriaEcContactMap(object):
# TODO: only one function for output files
# Write contact list in txt file
cmpmap.write_contacts("_".join((self.outprefix, mapt)),
scoremap=scoremap,
outdir=outdir)
# Write cmp stats
cmpmap.compare_contactmap(refmap, cmplist, prefix,
......
No preview for this file type
......@@ -216,9 +216,8 @@ class ProteinMap(Map):
def contactset(self, human_idx=False):
# Remove duplicate in contact_list
if self.contact_list():
return sorted([(x, y) for x, y in set(sorted(x for x in
self.contact_list(
human_idx)))])
return sorted(set((tuple(sorted((x, y)))
for x, y in self.contact_list(human_idx))))
else:
return None
......@@ -316,7 +315,7 @@ class ProteinMap(Map):
plt.plot(allfpr, alltpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic %s vs. %s' % (
......@@ -334,9 +333,9 @@ class ProteinMap(Map):
plt.plot(allrec, allprec, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.05])
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format(
plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format(
aver_prec, map1name, map2name))
plt.legend(loc="lower left")
plt.savefig(plotpath)
......@@ -430,7 +429,8 @@ class ProteinMap(Map):
dmin)
print(msg, file=outfile)
def write_contacts(self, filename, outdir="", human_idx=True):
def write_contacts(self, filename, outdir="", human_idx=True,
scoremap=None):
filepath = "%s/%s.contact.txt" % (outdir, filename)
logger.info("Generate contact file (%s)" % filepath)
with open(filepath, 'w') as outfile:
......@@ -439,8 +439,13 @@ class ProteinMap(Map):
# for contact in sorted(contacts):
for contact in self.contactset():
contact = sorted([contact[0] + offset, contact[1] + offset])
print("%d %d" % (contact[0], contact[1]), file=outfile)
if scoremap is not None:
score = scoremap.iat[(int(contact[0]) - offset,
int(contact[1]) - offset)]
print("%d %d %.4f" % (contact[0], contact[1], score),
file=outfile)
else:
print("%d %d" % (contact[0], contact[1]), file=outfile)
class ResAtmMap(ProteinMap):
"""
......
No preview for this file type
......@@ -153,12 +153,31 @@ class MapFile(RegexFile):
'(?P<hbscore>-?\d+\.?\d*)'),
"score_field": "hbscore"
},
"default": {
"default_1": {
"regex": re.compile(
'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
'(?P<resn1>\w+)\s+'
'(?P<resn2>\w+)\s+'
'(?P<score>[\w\d\.\+\-]+)'),
"score_field": "score"
},
"default_2": {
"regex": re.compile(
'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
'(?P<score>[\w\d\.\+\-]+)'),
"score_field": "score"
},
"default_3": {
"regex": re.compile(
'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'
'(?P<resn1>\w+)\s+'
'(?P<resn2>\w+)\s+'),
"score_field": ""
"score_field": None
},
"default_4": {
"regex": re.compile(
'^\s*(?P<res1_nb>\d+)\s+(?P<res2_nb>\d+)\s+'),
"score_field": None
}
}
check_type = True
......@@ -210,6 +229,11 @@ class MapFile(RegexFile):
# Check first and second line of file
for index, line in enumerate(infile):
match = self.types[self.filetype].get("regex").match(line)
# TODO: DRY rule !!
def1match = self.types["default_1"]["regex"].match(line)
def2match = self.types["default_2"]["regex"].match(line)
def3match = self.types["default_3"]["regex"].match(line)
def4match = self.types["default_4"]["regex"].match(line)
if match:
logger.debug("Format type correct")
return [
......@@ -217,6 +241,34 @@ class MapFile(RegexFile):
self.filetype,
self.types[self.filetype].get("score_field")
]
elif def1match:
logger.debug("Format type correct")
return [
self.types["default_1"].get("regex"),
self.filetype,
self.types["default_1"].get("score_field")
]
elif def2match:
logger.debug("Format type correct")
return [
self.types["default_2"].get("regex"),
self.filetype,
self.types["default_2"].get("score_field")
]
elif def3match:
logger.debug("Format type correct")
return [
self.types["default_3"].get("regex"),
self.filetype,
self.types["default_3"].get("score_field")
]
elif def4match:
logger.debug("Format type correct")
return [
self.types["default_4"].get("regex"),
self.filetype,
self.types["default_4"].get("score_field")
]
if index > 2:
logger.error("Error reading %s file." % self.filetype)
# Remove contact file
......
No preview for this file type
......@@ -167,7 +167,8 @@ class AriaEcSetup:
for maptype in self.allresmap:
self.allresmap[maptype].get("contactmap").write_contacts(
"_".join((self.outprefix, maptype, "filtered")),
self.settings.infra.get("others"))
self.settings.infra.get("others"), scoremap=self.allresmap[
maptype].get("scoremap"))
if self.allresmap[maptype]["alldistmap"] is not None:
self.allresmap[maptype]["alldistmap"].to_csv(
"%s/%s.distmap.csv" % (self.settings.infra.get("others"),
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment