Commit 8e4a6f6d authored by fabrice's avatar fabrice

Bug fixe: error when scoremap doesn't exist for report file (contactmap section)

parent c783cfa1
No preview for this file type
No preview for this file type
......@@ -57,7 +57,7 @@ ca_upper_bound: 7.0
cb_upper_bound: 7.0
; ---------------------------- Filter parameters ----------------------------- #
; n_factor : Number of EC selected: n * n_factor (n: sequence length)
; contactfilter : all or combinaison of pos, cons, cys, ssclash, nd
; contactfilter : all or combinaison of pos, cons, cys, ssclash, net_deconv
; separated by "+" character [pos]. If empty, use only position
; filter (avoid short range restraints)
n_factor: 1.0
......
......@@ -136,8 +136,8 @@ class AriaEcContactMap(object):
distmap=self.refmap["distmap"],
human_idx=True,
outdir=outdir)
refmap.report(cmpmap, scoremap, outprefix=prefix, outdir=outdir,
plotdir=self.settings.infra.get("graphics", outdir))
refmap.report(cmpmap, scoremap=scoremap, outprefix=prefix,
outdir=outdir, plotdir=self.settings.infra.get("graphics", outdir))
# Contact map comparison plot
# TODO: elementwise error with compare method
refmap.compareplot(cmpmap, outprefix=prefix,
......
No preview for this file type
"""
Network deconvolution tool
"""
from __future__ import absolute_import, division, print_function
import numpy as np
def nd(mat, beta=0.99, alpha=1, control=0):
def net_deconv(npmat, beta=0.99, alpha=1, control=0):
"""
This is a python implementation/translation of network deconvolution by
MIT-KELLIS LAB
......@@ -31,15 +35,15 @@ def nd(mat, beta=0.99, alpha=1, control=0):
DESCRIPTION:
USAGE:
mat_nd = ND(mat)
mat_nd = ND(mat,beta)
mat_nd = ND(mat,beta,alpha,control)
mat_nd = ND(npmat)
mat_nd = ND(npmat,beta)
mat_nd = ND(npmat,beta,alpha,control)
INPUT ARGUMENTS:
:type mat: np.matrix
:param mat: Input matrix, if it is a square matrix, the program assumes
it is a relevance matrix where mat(i,j) represents the
:type npmat: np.matrix
:param npmat: Input matrix, if it is a square matrix, the program assumes
it is a relevance matrix where npmat(i,j) represents the
similarity content between nodes i and j. Elements of
matrix should be non-negative.
optional parameters:
......@@ -85,15 +89,15 @@ def nd(mat, beta=0.99, alpha=1, control=0):
diagonal values are filtered
'''
# n = mat.shape[0]
np.fill_diagonal(mat, 0)
# n = npmat.shape[0]
np.fill_diagonal(npmat, 0)
'''
Thresholding the input matrix
'''
y = stat.mquantiles(mat[:], prob=[1 - alpha])
th = mat >= y
mat_th = mat * th
y = stat.mquantiles(npmat[:], prob=[1 - alpha])
th = npmat >= y
mat_th = npmat * th
'''
making the matrix symetric if already not
......@@ -129,10 +133,10 @@ def nd(mat, beta=0.99, alpha=1, control=0):
if control == 0:
ind_edges = (mat_th > 0) * 1.0
ind_nonedges = (mat_th == 0) * 1.0
m1 = np.max(np.max(mat * ind_nonedges))
m1 = np.max(np.max(npmat * ind_nonedges))
m2 = np.min(np.min(mat_new1))
mat_new2 = (mat_new1 + np.max(m1 - m2, 0)) * ind_edges + (
mat * ind_nonedges)
npmat * ind_nonedges)
else:
m2 = np.min(np.min(mat_new1))
mat_new2 = (mat_new1 + np.max(-m2, 0))
......
No preview for this file type
......@@ -293,52 +293,63 @@ class ProteinMap(Map):
if save_fig:
self.saveplot(**kwargs)
def report(self, cmpmap, scoremap, outprefix="", outdir="", plotdir="",
def report(self, cmpmap, scoremap=None, outprefix="", outdir="", plotdir="",
plot_ext="pdf"):
reportpath = "%s/%s.report" % (outdir, outprefix)
logger.info("Generate report file (%s)" % reportpath)
with open(reportpath, 'w') as reportf:
y_true = list(self.values.astype(int).flat)
y_pred = list(cmpmap.values.astype(int).flat)
y_scores = list(scoremap.values.astype(float).flat)
map1name = self.desc
map2name = cmpmap.desc
# ROC plot
allfpr, alltpr, rocthresholds = skm.roc_curve(y_true, y_scores,
pos_label=1)
roc_auc = skm.roc_auc_score(y_true, y_scores)
plotpath = os.path.join(plotdir, "%s.roc.%s" % (outprefix,
plot_ext))
plt.figure()
plt.plot(allfpr, alltpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic %s vs. %s' % (
map1name, map2name))
plt.legend(loc="lower right")
plt.savefig(plotpath)
allprec, allrec, prthresholds = skm.precision_recall_curve(
y_true, y_scores)
aver_prec = skm.average_precision_score(y_true, y_scores)
plotpath = os.path.join(plotdir, "%s.precall.%s" % (outprefix,
allfpr = None
alltpr = None
rocthresholds = None
roc_auc = None
allprec = None
allrec = None
prthresholds = None
aver_prec = None
if scoremap is not None:
# If prediction scores given, compute roc and precall curves
y_scores = list(scoremap.values.astype(float).flat)
# ROC plot
allfpr, alltpr, rocthresholds = skm.roc_curve(y_true, y_scores,
pos_label=1)
roc_auc = skm.roc_auc_score(y_true, y_scores)
plotpath = os.path.join(plotdir, "%s.roc.%s" % (outprefix,
plot_ext))
# Precision recall curve
plt.clf()
plt.plot(allrec, allprec, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format(
aver_prec, map1name, map2name))
plt.legend(loc="lower left")
plt.savefig(plotpath)
plt.figure()
plt.plot(allfpr, alltpr, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], 'k--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic %s vs. %s' % (
map1name, map2name))
plt.legend(loc="lower right")
plt.savefig(plotpath)
allprec, allrec, prthresholds = skm.precision_recall_curve(
y_true, y_scores)
aver_prec = skm.average_precision_score(y_true, y_scores)
plotpath = os.path.join(plotdir, "%s.precall.%s" % (outprefix,
plot_ext))
# Precision recall curve
plt.clf()
plt.plot(allrec, allprec, label='Precision-Recall curve')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.ylim([0.0, 1.0])
plt.xlim([0.0, 1.0])
plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format(
aver_prec, map1name, map2name))
plt.legend(loc="lower left")
plt.savefig(plotpath)
msg = """\
## Report {map1name} vs. {map2name}
......@@ -387,21 +398,20 @@ class ProteinMap(Map):
## False Positive Rate (1 - Specificity) values:
## {allfpr}
## Score tresholds ({map2name}):
## {rocthres}
""".format(map1name=map1name, map2name=map2name,
date=datetime.date.today().strftime("%A %d. %B %Y"),
outdir=outdir, accuracy=skm.accuracy_score(y_true, y_pred),
precision=skm.precision_score(y_true, y_pred),
recall=skm.recall_score(y_true, y_pred),
mcc=skm.matthews_corrcoef(y_true, y_pred),
f1s=skm.f1_score(y_true, y_pred),
f2s=skm.fbeta_score(y_true, y_pred, 2),
f05s=skm.fbeta_score(y_true, y_pred, 0.5),
hamm=skm.hamming_loss(y_true, y_pred),
hin=skm.hinge_loss(y_true, y_pred), aver_prec=aver_prec,
roc_auc=roc_auc, allprec=allprec, allrec=allrec,
prthres=prthresholds, alltpr=alltpr, allfpr=allfpr,
rocthres=rocthresholds)
## {rocthres}""".format(map1name=map1name, map2name=map2name,
date=datetime.date.today().strftime("%A %d. %B %Y"),
outdir=outdir, accuracy=skm.accuracy_score(y_true, y_pred),
precision=skm.precision_score(y_true, y_pred),
recall=skm.recall_score(y_true, y_pred),
mcc=skm.matthews_corrcoef(y_true, y_pred),
f1s=skm.f1_score(y_true, y_pred),
f2s=skm.fbeta_score(y_true, y_pred, 2),
f05s=skm.fbeta_score(y_true, y_pred, 0.5),
hamm=skm.hamming_loss(y_true, y_pred),
hin=skm.hinge_loss(y_true, y_pred), aver_prec=aver_prec,
roc_auc=roc_auc, allprec=allprec, allrec=allrec,
prthres=prthresholds, alltpr=alltpr, allfpr=allfpr,
rocthres=rocthresholds)
logger.debug("\n" + msg)
reportf.write(msg)
......@@ -511,11 +521,11 @@ class ResAtmMap(ProteinMap):
elif seqidx:
res_list = [
"%03d-%s" % (seqidx[i], aa) for i, aa in enumerate(seq)
for _ in (filter(self.heavy_reg.match, iupac_aa[aa].keys()))]
for _ in filter(self.heavy_reg.match, iupac_aa[aa].keys())]
else:
res_list = ["%03d-%s" % (i + 1, aa) for i, aa in enumerate(seq) for
_
in (filter(self.heavy_reg.match, iupac_aa[aa].keys()))]
in filter(self.heavy_reg.match, iupac_aa[aa].keys())]
# TODO: Inutile de repeter a chaque fois le calcul des listes
# d'atomes lourd pour chaque residu -> Deplacer au niveau de l'init
atm_list = [atm for aa in seq for atm in filter(self.heavy_reg.match,
......@@ -1154,13 +1164,11 @@ class MapFilter:
for clash_t in sorted(clash_dict.keys()):
if clash_dict[clash_t] and raw_contact in clash_dict[clash_t]:
clash = meta_clash[clash_t]["flag"]
meta_clash[clash_t]["msg"] += '''\
meta_clash[clash_t]["msg"] += """\
{clash_type} flag at pair {pair_nb} : res {res1} and res {res2} {clash_desc}
'''.format(
clash_type=clash, pair_nb=icontact + 1,
""".format(clash_type=clash, pair_nb=icontact + 1,
clash_desc=desc_dict.get(raw_contact, ''),
res1=contact[0],
res2=contact[1])
res1=contact[0], res2=contact[1])
break
if clashlist and str(clash) != str(clashlist[icontact]):
if clash == "0":
......@@ -1169,9 +1177,7 @@ class MapFilter:
else:
op = "added"
ctype = clash
meta_clash[clash_t]["warn"] += '''\
/!\ Clash: {clash_desc} {clash} flag for contact {res_pos} ({res1}, {res2})
'''.format(
meta_clash[clash_t]["warn"] += r"/!\ Clash: {clash_desc} {clash} flag for contact {res_pos} ({res1}, {res2})".format(
clash_desc=op, clash=ctype, res_pos=icontact + 1,
res1=contact[0], res2=contact[1])
......@@ -1180,10 +1186,9 @@ class MapFilter:
for flt in ("nd", "pos", "cons", "ssclash", "cys"):
out.write('''
{filter_desc}
{hd}
'''.format(
filter_desc=meta_clash[flt]["desc"].capitalize(),
{hd}'''.format(filter_desc=meta_clash[flt]["desc"].capitalize(),
hd="=" * len(meta_clash[flt]["desc"])))
if meta_clash[flt].get("warn"):
out.write(meta_clash[flt]["warn"])
out.write(meta_clash[flt]["msg"])
No preview for this file type
No preview for this file type
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment