diff --git a/ariaec/__init__.pyc b/ariaec/__init__.pyc index aaad1100aa43704a4c4d15ca44c9edcec7c634b0..0a688692da7f9ad1b88e6a73d21eb46c831377cc 100644 Binary files a/ariaec/__init__.pyc and b/ariaec/__init__.pyc differ diff --git a/ariaec/base.pyc b/ariaec/base.pyc index 6ba57492ff0c8c8321e0c1a639df8d983482e33f..a2225329ee2801ba5b4989df43d92d0b97f98486 100644 Binary files a/ariaec/base.pyc and b/ariaec/base.pyc differ diff --git a/ariaec/conf/aria_ec.ini b/ariaec/conf/aria_ec.ini index 499ce52536b5e113c5ac0756b05966dac9a19fa6..264cccbd6cea436a6d9cf12f401b53f661eb933b 100644 --- a/ariaec/conf/aria_ec.ini +++ b/ariaec/conf/aria_ec.ini @@ -57,7 +57,7 @@ ca_upper_bound: 7.0 cb_upper_bound: 7.0 ; ---------------------------- Filter parameters ----------------------------- # ; n_factor : Number of EC selected: n * n_factor (n: sequence length) -; contactfilter : all or combinaison of pos, cons, cys, ssclash, nd +; contactfilter : all or combinaison of pos, cons, cys, ssclash, net_deconv ; separated by "+" character [pos]. If empty, use only position ; filter (avoid short range restraints) n_factor: 1.0 diff --git a/ariaec/maplot.py b/ariaec/maplot.py index 0308b0367cb899bdc3f7a1c9dc0197bde8dd2533..18dee19188c7109298ea9b12d713aaea9e1273c5 100644 --- a/ariaec/maplot.py +++ b/ariaec/maplot.py @@ -136,8 +136,8 @@ class AriaEcContactMap(object): distmap=self.refmap["distmap"], human_idx=True, outdir=outdir) - refmap.report(cmpmap, scoremap, outprefix=prefix, outdir=outdir, - plotdir=self.settings.infra.get("graphics", outdir)) + refmap.report(cmpmap, scoremap=scoremap, outprefix=prefix, + outdir=outdir, plotdir=self.settings.infra.get("graphics", outdir)) # Contact map comparison plot # TODO: elementwise error with compare method refmap.compareplot(cmpmap, outprefix=prefix, diff --git a/ariaec/maplot.pyc b/ariaec/maplot.pyc index b63d9ba6c1e1ba9f523b0d23b592afe556bd97ce..3474cdebf8089529afd56bbcf930a6ec6981bd15 100644 Binary files a/ariaec/maplot.pyc and b/ariaec/maplot.pyc differ diff --git a/ariaec/ndconv.py b/ariaec/ndconv.py index e8703751978588d27df4a806d8fc471d610c6501..40cfe675215fd651a0bff3648248e0bb6499e2d1 100644 --- a/ariaec/ndconv.py +++ b/ariaec/ndconv.py @@ -1,10 +1,14 @@ +""" + Network deconvolution tool +""" from __future__ import absolute_import, division, print_function import numpy as np -def nd(mat, beta=0.99, alpha=1, control=0): +def net_deconv(npmat, beta=0.99, alpha=1, control=0): """ + This is a python implementation/translation of network deconvolution by MIT-KELLIS LAB @@ -31,15 +35,15 @@ def nd(mat, beta=0.99, alpha=1, control=0): DESCRIPTION: USAGE: - mat_nd = ND(mat) - mat_nd = ND(mat,beta) - mat_nd = ND(mat,beta,alpha,control) + mat_nd = ND(npmat) + mat_nd = ND(npmat,beta) + mat_nd = ND(npmat,beta,alpha,control) INPUT ARGUMENTS: - :type mat: np.matrix - :param mat: Input matrix, if it is a square matrix, the program assumes - it is a relevance matrix where mat(i,j) represents the + :type npmat: np.matrix + :param npmat: Input matrix, if it is a square matrix, the program assumes + it is a relevance matrix where npmat(i,j) represents the similarity content between nodes i and j. Elements of matrix should be non-negative. optional parameters: @@ -85,15 +89,15 @@ def nd(mat, beta=0.99, alpha=1, control=0): diagonal values are filtered ''' - # n = mat.shape[0] - np.fill_diagonal(mat, 0) + # n = npmat.shape[0] + np.fill_diagonal(npmat, 0) ''' Thresholding the input matrix ''' - y = stat.mquantiles(mat[:], prob=[1 - alpha]) - th = mat >= y - mat_th = mat * th + y = stat.mquantiles(npmat[:], prob=[1 - alpha]) + th = npmat >= y + mat_th = npmat * th ''' making the matrix symetric if already not @@ -129,10 +133,10 @@ def nd(mat, beta=0.99, alpha=1, control=0): if control == 0: ind_edges = (mat_th > 0) * 1.0 ind_nonedges = (mat_th == 0) * 1.0 - m1 = np.max(np.max(mat * ind_nonedges)) + m1 = np.max(np.max(npmat * ind_nonedges)) m2 = np.min(np.min(mat_new1)) mat_new2 = (mat_new1 + np.max(m1 - m2, 0)) * ind_edges + ( - mat * ind_nonedges) + npmat * ind_nonedges) else: m2 = np.min(np.min(mat_new1)) mat_new2 = (mat_new1 + np.max(-m2, 0)) diff --git a/ariaec/protein.pyc b/ariaec/protein.pyc index bb3ccaa9234ccfc7e5c497e3221e2035e56ec577..0fdf07a9316395e7555b137bf7d945d91e2ebe25 100644 Binary files a/ariaec/protein.pyc and b/ariaec/protein.pyc differ diff --git a/ariaec/protmap.py b/ariaec/protmap.py index 7b4b25859744a7885144611764d35d76e7665d4c..ec12220c196627eed3f8f6feda95830f3aa4d37d 100644 --- a/ariaec/protmap.py +++ b/ariaec/protmap.py @@ -293,52 +293,63 @@ class ProteinMap(Map): if save_fig: self.saveplot(**kwargs) - def report(self, cmpmap, scoremap, outprefix="", outdir="", plotdir="", + def report(self, cmpmap, scoremap=None, outprefix="", outdir="", plotdir="", plot_ext="pdf"): reportpath = "%s/%s.report" % (outdir, outprefix) logger.info("Generate report file (%s)" % reportpath) with open(reportpath, 'w') as reportf: y_true = list(self.values.astype(int).flat) y_pred = list(cmpmap.values.astype(int).flat) - y_scores = list(scoremap.values.astype(float).flat) - map1name = self.desc map2name = cmpmap.desc - # ROC plot - allfpr, alltpr, rocthresholds = skm.roc_curve(y_true, y_scores, - pos_label=1) - roc_auc = skm.roc_auc_score(y_true, y_scores) - plotpath = os.path.join(plotdir, "%s.roc.%s" % (outprefix, - plot_ext)) - plt.figure() - plt.plot(allfpr, alltpr, label='ROC curve (area = %0.2f)' % roc_auc) - plt.plot([0, 1], [0, 1], 'k--') - plt.xlim([0.0, 1.0]) - plt.ylim([0.0, 1.0]) - plt.xlabel('False Positive Rate') - plt.ylabel('True Positive Rate') - plt.title('Receiver operating characteristic %s vs. %s' % ( - map1name, map2name)) - plt.legend(loc="lower right") - plt.savefig(plotpath) - - allprec, allrec, prthresholds = skm.precision_recall_curve( - y_true, y_scores) - aver_prec = skm.average_precision_score(y_true, y_scores) - plotpath = os.path.join(plotdir, "%s.precall.%s" % (outprefix, + allfpr = None + alltpr = None + rocthresholds = None + roc_auc = None + allprec = None + allrec = None + prthresholds = None + aver_prec = None + + if scoremap is not None: + # If prediction scores given, compute roc and precall curves + y_scores = list(scoremap.values.astype(float).flat) + + # ROC plot + allfpr, alltpr, rocthresholds = skm.roc_curve(y_true, y_scores, + pos_label=1) + roc_auc = skm.roc_auc_score(y_true, y_scores) + plotpath = os.path.join(plotdir, "%s.roc.%s" % (outprefix, plot_ext)) - # Precision recall curve - plt.clf() - plt.plot(allrec, allprec, label='Precision-Recall curve') - plt.xlabel('Recall') - plt.ylabel('Precision') - plt.ylim([0.0, 1.0]) - plt.xlim([0.0, 1.0]) - plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format( - aver_prec, map1name, map2name)) - plt.legend(loc="lower left") - plt.savefig(plotpath) + plt.figure() + plt.plot(allfpr, alltpr, label='ROC curve (area = %0.2f)' % roc_auc) + plt.plot([0, 1], [0, 1], 'k--') + plt.xlim([0.0, 1.0]) + plt.ylim([0.0, 1.0]) + plt.xlabel('False Positive Rate') + plt.ylabel('True Positive Rate') + plt.title('Receiver operating characteristic %s vs. %s' % ( + map1name, map2name)) + plt.legend(loc="lower right") + plt.savefig(plotpath) + + allprec, allrec, prthresholds = skm.precision_recall_curve( + y_true, y_scores) + aver_prec = skm.average_precision_score(y_true, y_scores) + plotpath = os.path.join(plotdir, "%s.precall.%s" % (outprefix, + plot_ext)) + # Precision recall curve + plt.clf() + plt.plot(allrec, allprec, label='Precision-Recall curve') + plt.xlabel('Recall') + plt.ylabel('Precision') + plt.ylim([0.0, 1.0]) + plt.xlim([0.0, 1.0]) + plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format( + aver_prec, map1name, map2name)) + plt.legend(loc="lower left") + plt.savefig(plotpath) msg = """\ ## Report {map1name} vs. {map2name} @@ -387,21 +398,20 @@ class ProteinMap(Map): ## False Positive Rate (1 - Specificity) values: ## {allfpr} ## Score tresholds ({map2name}): -## {rocthres} -""".format(map1name=map1name, map2name=map2name, - date=datetime.date.today().strftime("%A %d. %B %Y"), - outdir=outdir, accuracy=skm.accuracy_score(y_true, y_pred), - precision=skm.precision_score(y_true, y_pred), - recall=skm.recall_score(y_true, y_pred), - mcc=skm.matthews_corrcoef(y_true, y_pred), - f1s=skm.f1_score(y_true, y_pred), - f2s=skm.fbeta_score(y_true, y_pred, 2), - f05s=skm.fbeta_score(y_true, y_pred, 0.5), - hamm=skm.hamming_loss(y_true, y_pred), - hin=skm.hinge_loss(y_true, y_pred), aver_prec=aver_prec, - roc_auc=roc_auc, allprec=allprec, allrec=allrec, - prthres=prthresholds, alltpr=alltpr, allfpr=allfpr, - rocthres=rocthresholds) +## {rocthres}""".format(map1name=map1name, map2name=map2name, + date=datetime.date.today().strftime("%A %d. %B %Y"), + outdir=outdir, accuracy=skm.accuracy_score(y_true, y_pred), + precision=skm.precision_score(y_true, y_pred), + recall=skm.recall_score(y_true, y_pred), + mcc=skm.matthews_corrcoef(y_true, y_pred), + f1s=skm.f1_score(y_true, y_pred), + f2s=skm.fbeta_score(y_true, y_pred, 2), + f05s=skm.fbeta_score(y_true, y_pred, 0.5), + hamm=skm.hamming_loss(y_true, y_pred), + hin=skm.hinge_loss(y_true, y_pred), aver_prec=aver_prec, + roc_auc=roc_auc, allprec=allprec, allrec=allrec, + prthres=prthresholds, alltpr=alltpr, allfpr=allfpr, + rocthres=rocthresholds) logger.debug("\n" + msg) reportf.write(msg) @@ -511,11 +521,11 @@ class ResAtmMap(ProteinMap): elif seqidx: res_list = [ "%03d-%s" % (seqidx[i], aa) for i, aa in enumerate(seq) - for _ in (filter(self.heavy_reg.match, iupac_aa[aa].keys()))] + for _ in filter(self.heavy_reg.match, iupac_aa[aa].keys())] else: res_list = ["%03d-%s" % (i + 1, aa) for i, aa in enumerate(seq) for _ - in (filter(self.heavy_reg.match, iupac_aa[aa].keys()))] + in filter(self.heavy_reg.match, iupac_aa[aa].keys())] # TODO: Inutile de repeter a chaque fois le calcul des listes # d'atomes lourd pour chaque residu -> Deplacer au niveau de l'init atm_list = [atm for aa in seq for atm in filter(self.heavy_reg.match, @@ -1154,13 +1164,11 @@ class MapFilter: for clash_t in sorted(clash_dict.keys()): if clash_dict[clash_t] and raw_contact in clash_dict[clash_t]: clash = meta_clash[clash_t]["flag"] - meta_clash[clash_t]["msg"] += '''\ + meta_clash[clash_t]["msg"] += """\ {clash_type} flag at pair {pair_nb} : res {res1} and res {res2} {clash_desc} -'''.format( - clash_type=clash, pair_nb=icontact + 1, +""".format(clash_type=clash, pair_nb=icontact + 1, clash_desc=desc_dict.get(raw_contact, ''), - res1=contact[0], - res2=contact[1]) + res1=contact[0], res2=contact[1]) break if clashlist and str(clash) != str(clashlist[icontact]): if clash == "0": @@ -1169,9 +1177,7 @@ class MapFilter: else: op = "added" ctype = clash - meta_clash[clash_t]["warn"] += '''\ -/!\ Clash: {clash_desc} {clash} flag for contact {res_pos} ({res1}, {res2}) -'''.format( + meta_clash[clash_t]["warn"] += r"/!\ Clash: {clash_desc} {clash} flag for contact {res_pos} ({res1}, {res2})".format( clash_desc=op, clash=ctype, res_pos=icontact + 1, res1=contact[0], res2=contact[1]) @@ -1180,10 +1186,9 @@ class MapFilter: for flt in ("nd", "pos", "cons", "ssclash", "cys"): out.write(''' {filter_desc} -{hd} -'''.format( - filter_desc=meta_clash[flt]["desc"].capitalize(), +{hd}'''.format(filter_desc=meta_clash[flt]["desc"].capitalize(), hd="=" * len(meta_clash[flt]["desc"]))) + if meta_clash[flt].get("warn"): out.write(meta_clash[flt]["warn"]) out.write(meta_clash[flt]["msg"]) diff --git a/ariaec/protmap.pyc b/ariaec/protmap.pyc index 1491a80fcb6c161b11f21ee12cae5c2610f3ec42..6e5726397f52407b3bafcf63675c9133ccb4dd47 100644 Binary files a/ariaec/protmap.pyc and b/ariaec/protmap.pyc differ diff --git a/ariaec/setup.pyc b/ariaec/setup.pyc index 767b4beb3f6aad326f5826ab74438d394ccf5b33..ddb0fe923f1b90ba63df12363dfd88aff5391197 100644 Binary files a/ariaec/setup.pyc and b/ariaec/setup.pyc differ