protmap.py 78.8 KB
Newer Older
1
# coding=utf-8
fabrice's avatar
fabrice committed
2
3
4
5
"""
                    ARIA Evolutionary Constraints Tools
"""
from __future__ import absolute_import, division, print_function
6

Fabrice Allain's avatar
Fabrice Allain committed
7
8
from ..core import ConversionTable as ConversionTable
import collections
9
import csv
10
11
import datetime
import itertools
Fabrice Allain's avatar
Fabrice Allain committed
12
import logging
13
14
import matplotlib
import numpy as np
Fabrice Allain's avatar
Fabrice Allain committed
15
16
import operator
import os
fabrice's avatar
fabrice committed
17
import pandas as pd
Fabrice Allain's avatar
Fabrice Allain committed
18
import re
fabrice's avatar
fabrice committed
19
import seaborn as sns
Fabrice Allain's avatar
Fabrice Allain committed
20
import six
21
import sklearn.metrics as skm
Fabrice Allain's avatar
Fabrice Allain committed
22
23
24
25
import string
import textwrap
from collections import defaultdict
from copy import deepcopy
26
from matplotlib import pyplot as plt
fabrice's avatar
fabrice committed
27
from matplotlib.lines import Line2D
28

Fabrice Allain's avatar
Fabrice Allain committed
29
from ..core.legacy import AminoAcid as AminoAcid
30
from .common import (tickmin, tickrot, titleprint, addtup)
31
from .ndconv import net_deconv
32

33
matplotlib.use("Agg", warn=False)
34
LOG = logging.getLogger(__name__)
fabrice's avatar
fabrice committed
35
36
37
38
# TODO: check dataframe symmetry or always use unstack
# TODO: objet MapContainer contenant les differentes maps en attributs ! (et
# non en clef de dict)

39

fabrice's avatar
fabrice committed
40
class Map(pd.DataFrame):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
41
    """Distance/contact matrix"""
fabrice's avatar
fabrice committed
42
43
44

    mtype_choices = {'contact': bool, 'distance': float, "score": float}

Fabrice Allain's avatar
Fabrice Allain committed
45
    def update(self, *args, **kwargs):
46
        """
47
        
48

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
49
50
51
52
53
54
55
56
57
58
59
60
61
        Parameters
        ----------
        args :
            param kwargs:
        *args :
            
        **kwargs :
            

        Returns
        -------

        
62
        """
Fabrice Allain's avatar
Fabrice Allain committed
63
64
65
66
        super(Map, self).update(*args, **kwargs)

    # def _constructor_expanddim(self):
    #     super(Map, self)._constructor_expanddim()
fabrice's avatar
fabrice committed
67
68

    def __init__(self, index=None, columns=None, mtype='distance',
69
                 duplicate_levels=False, data=None, dtype=None, sym=True,
70
                 desc="", path="", **kwargs):
fabrice's avatar
fabrice committed
71
        """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
72
73
74
75
76
77
78
79
80
81
82
83
84
        
        Parameters
        ----------
        index
        columns
        mtype
        duplicate_levels
            Allow duplicate levels in dataframe
        data
        dtype
        sym
        desc
        path
fabrice's avatar
fabrice committed
85
86
87
        """
        if not dtype:
            dtype = self.check_type(mtype)
88
        # TODO: should probably init to np.NaN
fabrice's avatar
fabrice committed
89
        if data is None:
Fabrice Allain's avatar
Fabrice Allain committed
90
            data = False if dtype == bool else np.NaN
91
        super(Map, self).__init__(data=data, dtype=dtype, index=index,
fabrice's avatar
fabrice committed
92
93
                                  columns=columns)
        self.duplicate_levels = duplicate_levels
Fabrice Allain's avatar
Fabrice Allain committed
94
        self.mtype = mtype
fabrice's avatar
fabrice committed
95
96
97
98
        self.dtype = dtype
        if mtype == "score":
            self.sort_list = []
        self.sym = sym
99
        self.desc = desc
100
        self.path = path
fabrice's avatar
fabrice committed
101

102
103
104
    def __str__(self):
        return super(Map, self).__str__()

fabrice's avatar
fabrice committed
105
    def sortedset(self, human_idx=False):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
106
        """
107
        
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
108
109
110
111
112
113
114
115
116
117
118

        Parameters
        ----------
        human_idx :
            (Default value = False)

        Returns
        -------

        
        """
fabrice's avatar
fabrice committed
119
        # Remove duplicate in sort_list
120
121
122
123
124
        """

        :param human_idx:
        :return:
        """
fabrice's avatar
fabrice committed
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
        n = 1 if human_idx else 0
        if hasattr(self, "sort_list"):
            if self.sym:
                # Use OrderedDict to keep the order
                return [(x + n, y + n)
                        for x, y in
                        collections.OrderedDict.fromkeys(frozenset(x)
                                                         for x in
                                                         self.sort_list)]
            else:
                # Asym matrix, no need to remove duplicate
                return [(x + n, y + n) for x, y in self.sort_list]
        else:
            return None

    def check_type(self, mtype):
141
        """
142
        
143

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
144
145
146
147
148
149
150
151
152
        Parameters
        ----------
        mtype :
            return:

        Returns
        -------

        
153
        """
fabrice's avatar
fabrice committed
154
155
156
157
        value = self.mtype_choices.get(mtype)
        if value:
            return value
        else:
158
159
            LOG.error("Map type should be in list %s",
                      self.mtype_choices.keys())
fabrice's avatar
fabrice committed
160
161
162
            return None

    def reduce(self):
Fabrice Allain's avatar
Fabrice Allain committed
163
164
        """Low complexcity dataframe"""
        raise NotImplementedError
165

Fabrice Allain's avatar
Fabrice Allain committed
166
    def copy(self, **kwargs):
167
168
        """
        Copy the current map
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
169
170
171
172
173
174
175
176
177
178
179

        Parameters
        ----------
        **kwargs :
            

        Returns
        -------

        
        """
Fabrice Allain's avatar
Fabrice Allain committed
180
        raise NotImplementedError
fabrice's avatar
fabrice committed
181
182

    def remove(self, rm_list):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
183
        """
184
        
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
185
186
187
188
189
190
191
192
193
194
195

        Parameters
        ----------
        rm_list :
            

        Returns
        -------

        
        """
fabrice's avatar
fabrice committed
196
        # Reset values at positions in rm_list
197
198
199
200
        """

        :param rm_list:
        """
Fabrice Allain's avatar
Fabrice Allain committed
201
        value = False if self.dtype == bool else np.NaN
fabrice's avatar
fabrice committed
202
203
204
205
206
207
208
209
210
211
212
213
214
        for contact in rm_list:
            idx1, idx2 = self.index[contact[0]], self.index[contact[1]]
            self.set_value(idx1, idx2, value)
            # self.iat[(contact[0], contact[1])] = value
            # self.iat[(contact[1], contact[0])] = value
            if hasattr(self, 'sort_list'):
                # ! sort_list start at 1
                if (contact[0], contact[1]) in self.sort_list:
                    self.sort_list.remove((contact[0], contact[1]))
                if (contact[1], contact[0]) in self.sort_list and self.sym:
                    self.sort_list.remove((contact[1], contact[0]))

    def to_series(self):
215
        """Return panda series related to lower triangle values"""
fabrice's avatar
fabrice committed
216
217
218
219
220
221
222
223
        df = self.copy()
        df = df.astype(float)
        # remove values from upper triangle
        df.values[np.triu_indices_from(df, k=1)] = np.nan
        # pd.series with only lower triangle values
        return df.unstack().dropna()

    def topmap(self, scoremap, nb_topcontact):
224
        """
225
        
226

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
227
228
229
230
231
232
233
234
235
236
237
        Parameters
        ----------
        scoremap :
            param nb_topcontact:
        nb_topcontact :
            

        Returns
        -------

        
238
        """
fabrice's avatar
fabrice committed
239
        if self.dtype != bool:
240
            LOG.info("Error when retrieving top contact map. The type of "
241
                     "the given map is not a contact type!")
fabrice's avatar
fabrice committed
242
243
244
245
246
247
248
249
250
            return self
        self[:] = False
        if self.shape == scoremap.shape:
            pair_list = scoremap.sortedset()[:nb_topcontact]
            for contact in pair_list:
                self.iat[(contact[1], contact[0])] = True
                self.iat[(contact[0], contact[1])] = True
            return self
        else:
251
            LOG.error("Given scoremap has not the same dimension !")
fabrice's avatar
fabrice committed
252
253
254
            return None

    def subfill(self, pairdict, level=0):
255
256
        """
        Fill map with dict giving
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
257
258
259
260
261
262
263
264
265
266
267
268

        Parameters
        ----------
        pairdict :
            param level:
        level :
            (Default value = 0)

        Returns
        -------

        
fabrice's avatar
fabrice committed
269
270
271
272
273
274
275
276
277
        """
        pairdict = {k.upper(): v for k, v in pairdict.items()}
        if "def" in pairdict:
            self[:] = pairdict["def"]
        for idxval in pairdict:
            idx = self.index.get_level_values(level) == idxval
            self.loc[idx, idx] = pairdict[idxval]

    def set_value(self, index, col, value, **kwargs):
278
        """
279
        
280

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
        Parameters
        ----------
        index :
            param col:
        value :
            param kwargs:
        col :
            
        **kwargs :
            

        Returns
        -------

        
296
        """
fabrice's avatar
fabrice committed
297
298
299
300
301
        super(Map, self).set_value(index, col, value, **kwargs)
        if self.sym:
            super(Map, self).set_value(col, index, value, **kwargs)


302
# TODO: Matrices PosAaAtmMap, AaAtmMap, AtmMap
fabrice's avatar
fabrice committed
303
class ProteinMap(Map):
304
    """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
305
    Abstract class for protein contact map objects
306
    """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
307
308
309
310
311
312
    # TODO: matrix should be not only for heavy atoms
    # Matrix only for heavy atoms.
    heavy_reg = re.compile(r"[CNOS][ABGDEZH][0-9]?")
    all_reg = re.compile(r"^((?!cns|dyana).*)$")
    # TODO: Autre methodes de dist
    distance_method = 'euclidean'
313

314
    def __init__(self, sequence, **kwargs):
315
316
        if kwargs.get("index") is None or kwargs.get("columns") is None:
            kwargs["index"], kwargs["columns"] = self.create_index(
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
317
                sequence, **kwargs)
fabrice's avatar
fabrice committed
318
        super(ProteinMap, self).__init__(**kwargs)
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
319
        self.contact_flags = kwargs.get("flaglist")
fabrice's avatar
fabrice committed
320
        self._maplot = None
fabrice's avatar
fabrice committed
321

Fabrice Allain's avatar
Fabrice Allain committed
322
323
    # def _constructor_expanddim(self):
    #    return self._constructor_expanddim()
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
    @property
    def sequence(self):
        """
        Specific sequence related to the map in a string
        Returns
        -------
        str

        """
        raise NotImplementedError

    def create_heatmap(self):
        """
        Generate heatmap

        Parameters
        ----------

        Returns
        -------


        """
        raise NotImplementedError

    def contact_map(self, contactdef, scsc_min=None):
        """
        Generate contact map

        Parameters
        ----------
        contactdef :
            param scsc_min:
        scsc_min :
            (Default value = None)

        Returns
        -------


        """
        raise NotImplementedError

    def create_index(self, sequence, **kwargs):
        """

        Parameters
        ----------
        sequence
        kwargs

        Returns
        -------

        """
        # Indexation matrice (tous les atomes ou tous les residus)
        raise NotImplementedError
fabrice's avatar
fabrice committed
381

Fabrice Allain's avatar
Fabrice Allain committed
382
    def reduce(self):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
383
        """Lower index level if multi index"""
Fabrice Allain's avatar
Fabrice Allain committed
384
385
386
387
388
389
390
391
392
393
394
        # TODO: check if multiindex
        columns = ['-'.join(idx) for idx in self.columns]
        index = ['-'.join(idx) for idx in self.index]
        return getattr(self, '__init__')(self.sequence,
                                         path=self.path,
                                         data=self.as_matrix(),
                                         index=index, desc=self.desc,
                                         sym=self.sym, mtype=self.mtype,
                                         columns=columns, dtype=self.dtype)

    def copy(self, **kwargs):
395
396
        """
        Copy dataframe and related attributes of the map
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
397
398
399
400
401
402
403
404
405
406

        Parameters
        ----------
        **kwargs :
            

        Returns
        -------

        
Fabrice Allain's avatar
Fabrice Allain committed
407
408
        """
        df = super(Map, self).copy()
409
410
411
        return ProteinMap(
            sequence=self.sequence, path=self.path, data=df, desc=self.desc,
            sym=self.sym, mtype=self.mtype, dtype=self.dtype)
Fabrice Allain's avatar
Fabrice Allain committed
412

fabrice's avatar
fabrice committed
413
    def plotflush(self):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
414
        """Flush contact map plot"""
fabrice's avatar
fabrice committed
415
416
417
418
419
        plt.clf()
        plt.cla()
        plt.close()
        self._maplot = None

fabrice's avatar
fabrice committed
420
    @property
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
421
    def maplot(self, linewidths=0.0):
422
423
        """
        Contact map plot
424
        :return:
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
425
426
427
428
429
430
431
432
433
434

        Parameters
        ----------
        linewidths :
            (Default value = 0.0)

        Returns
        -------

        
435
        """
fabrice's avatar
fabrice committed
436
437
        # Contact map Plot
        if not self._maplot:
438
            # Flush matplot
439
            LOG.debug("Build maplot")
fabrice's avatar
fabrice committed
440
441
442
            minticks = tickmin(self, shift=1)  # Nb graduations

            self._maplot = sns.heatmap(self, square=True, cbar=False,
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
443
                                       linewidths=linewidths, vmax=1, vmin=-1,
fabrice's avatar
fabrice committed
444
445
446
                                       cmap=sns.diverging_palette(20, 220, n=7,
                                                                  as_cmap=True),
                                       xticklabels=minticks[0],
447
448
                                       yticklabels=minticks[1],
                                       linecolor="grey")
fabrice's avatar
fabrice committed
449
450
451
452
        return self._maplot

    def saveplot(self, outdir='', outprefix="protein", size_fig=10,
                 plot_ext="pdf", plot_dpi=200):
453
454
        """
        Save plot
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472

        Parameters
        ----------
        outdir :
            param outprefix: (Default value = '')
        size_fig :
            param plot_ext: (Default value = 10)
        plot_dpi :
            (Default value = 200)
        outprefix :
            (Default value = "protein")
        plot_ext :
            (Default value = "pdf")

        Returns
        -------

        
473
        """
474
        plotpath = os.path.join(outdir, "%s.maplot.%s" % (
475
            outprefix, plot_ext))
476
        LOG.info("Generate contact map plot (%s)", plotpath)
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
477
        f, ax = plt.subplots()
478
        tickrot(self.maplot.axes, self.maplot.figure)
fabrice's avatar
fabrice committed
479
480
481
482
483
        self.maplot.figure.set_size_inches(size_fig, size_fig)
        map_title = "%s contacts map" % outprefix
        self.maplot.set_title(map_title)
        self.maplot.figure.tight_layout()

484
        f.tight_layout()
485
        self.maplot.figure.savefig(plotpath, dpi=plot_dpi)
486
        plt.close('all')
fabrice's avatar
fabrice committed
487

fabrice's avatar
fabrice committed
488
    def contactset(self, human_idx=False):
489
490
        """
        Remove duplicate in contact_list
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
491
492
493
494
495
496
497
498

        Parameters
        ----------
        human_idx :
            return: (Default value = False)

        Returns
        -------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
499
        list
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
500
        
501
        """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
502
503
504
505
506
        contacts = self.contact_list(human_idx)
        if contacts:
            contacts = sorted(set((tuple(sorted((x, y)))
                                   for x, y in contacts)))
        return contacts
fabrice's avatar
fabrice committed
507
508

    def contact_list(self, human_idx=False):
509
510
        """
        Return contact list
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
511
512
513
514
515
516
517
518

        Parameters
        ----------
        human_idx :
            return: (Default value = False)

        Returns
        -------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
519
        list
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
520
        
521
        """
fabrice's avatar
fabrice committed
522
523
524
        contact_list = []
        n = 1 if human_idx else 0
        if self.dtype is bool:
Fabrice Allain's avatar
Fabrice Allain committed
525
526
            # for irow, row in enumerate(self):
            for irow, row in enumerate(self.index):
fabrice's avatar
fabrice committed
527
528
529
                for icol, value in enumerate(self[row]):
                    if value:
                        contact_list.append((irow + n, icol + n))
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
530
        return contact_list
fabrice's avatar
fabrice committed
531

532
    def compareplot(self, protmap, save_fig=True, alpha=None, **kwargs):
533
534
        """
        Compare 2 contact map and plot differences
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550

        Parameters
        ----------
        protmap :
            param save_fig:
        alpha :
            param kwargs: (Default value = None)
        save_fig :
            (Default value = True)
        **kwargs :
            

        Returns
        -------

        
551
        """
552
        self.plotflush()
fabrice's avatar
fabrice committed
553
554
555
556
557
558
        # Contact map plot
        if getattr(protmap, "shape") and self.shape != protmap.shape:
            logging.error("Cant't compare %s map with %s" % (
                protmap.__class__.__name__, self.__class__.__name__))
            return None
        else:
559
            cmplist = protmap.contact_list(human_idx=True)
fabrice's avatar
fabrice committed
560
561
562
563
564
565
566
567
568

            ymax = len(self.sequence) - 1
            if protmap.contact_flags:
                flags = set(protmap.contact_flags.values())
                # Color palette
                pal = sns.color_palette("hls", len(flags))
                for i, flag in enumerate(flags):
                    conlist = [contact for contact in protmap.contact_flags if
                               protmap.contact_flags[contact] == flag]
569
                    xind = [x - .5 for x in
fabrice's avatar
fabrice committed
570
                            zip(*conlist)[0] + zip(*conlist)[1]]
571
                    yind = [ymax - y + 1.5 for y in
fabrice's avatar
fabrice committed
572
573
574
575
                            zip(*conlist)[1] + zip(*conlist)[0]]
                    color = pal[i]
                    mark = Line2D.filled_markers[i]
                    for x, y in zip(xind, yind):
576
577
                        self.maplot.axes.scatter(x, y, s=8, c=color,
                                                 linewidths=0.1, alpha=alpha,
fabrice's avatar
fabrice committed
578
                                                 marker=mark)
fabrice's avatar
fabrice committed
579
            else:
580
                LOG.info("Contact list: %s", cmplist)
581
                xind = [x - .5 for x in
fabrice's avatar
fabrice committed
582
                        zip(*cmplist)[0] + zip(*cmplist)[1]]
583
                yind = [ymax - y + 1.5 for y in
fabrice's avatar
fabrice committed
584
                        zip(*cmplist)[1] + zip(*cmplist)[0]]
585
586
                LOG.debug("Xind: %s", xind)
                LOG.debug("Yind: %s", yind)
fabrice's avatar
fabrice committed
587
588
589
590
                color = "red"
                # width = [0.3 for _ in xind]
                # for x, y, h in zip(xind, yind, width):
                for x, y in zip(xind, yind):
591
592
                    self.maplot.axes.scatter(x, y, s=30, c=color,
                                             linewidths=0,
fabrice's avatar
fabrice committed
593
                                             alpha=alpha)
fabrice's avatar
fabrice committed
594
            if save_fig:
fabrice's avatar
fabrice committed
595
                self.saveplot(**kwargs)
fabrice's avatar
fabrice committed
596

597
598
    @staticmethod
    def _write_report(reportpath, **scores):
599
600
        """
        Write report file from score dict
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
601

602
603
        Parameters
        ----------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
604
605
606
607
608
609
        reportpath :
            
        scores :
            
        **scores :
            
610
611
612
613

        Returns
        -------

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
614
        
615
616
        """
        with open(reportpath, 'w') as reportf:
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
            msg = string.Formatter().vformat("""\
## Report {map1name} vs. {map2name}
##
## Date: {date}
##
## Plots: {outdir}
## Reference map: {map1path}
## Contact map: {map2path}
##
## -----------------------------------------------------------------------------
##
## Sequence:
## {seq}
## Protein length: {protlen}
##
## -----------------------------------------------------------------------------
##
## Matthews correlation coefficient (MCC): {mcc}
## F1 score: {f1s}
## F2 score: {f2s}
## F0.5 score: {f05s}
##
## Precision: {precision}
## Recall (Sensibility): {recall}
## Accuracy: {accuracy}
##
## Hamming loss: {hamm}
## Hinge loss: {hin}
##
## -----------------------------------------------------------------------------
##
##                                  Plot scores
##
## ROC Area Under Curve: {roc_auc}
## Average precision score: {aver_prec}
##
## -----------------------------------------------------------------------------
##
##                          Precision recall curve
##
## Precision values:
## {allprec}
## Recall values:
## {allrec}
## Score tresholds ({map2name}):
## {prthres}
##
## -----------------------------------------------------------------------------
##
##                               ROC curve
##
## True Positive Rate (Sensibility) values:
## {alltpr}
## False Positive Rate (1 - Specificity) values:
## {allfpr}
## Score tresholds ({map2name}):
## {rocthres}""", (), defaultdict(str, **scores))
674
675
676
677
678
            LOG.debug("\n" + msg)
            reportf.write(msg)

    @staticmethod
    def classification_metrics(y_true, y_pred, y_scores=None):
679
680
        """
        Compute classification metrics
681
682
683

        Parameters
        ----------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
684
685
686
687
688
689
        y_true : numpy array of true values
            
        y_pred : numpy array of predicted values
            
        y_scores : numpy array of prediction scores
            (Default value = None)
690
691
692
693

        Returns
        -------

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
694
        
695
        """
696
        metrics = {}
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712

        if 1 in y_pred and 1 in y_true:
            metrics.update({
                'accuracy': skm.accuracy_score(y_true, y_pred),
                'precision': skm.precision_score(y_true, y_pred),
                'recall': skm.recall_score(y_true, y_pred),
                'mcc': skm.matthews_corrcoef(y_true, y_pred),
                'f1s': skm.f1_score(y_true, y_pred),
                'f2s': skm.fbeta_score(y_true, y_pred, 2),
                'f05s': skm.fbeta_score(y_true, y_pred, 0.5),
                'hamm': skm.hamming_loss(y_true, y_pred),
                'hin': skm.hinge_loss(y_true, y_pred)
            })

        if y_scores:
            # ROC plot
Fabrice Allain's avatar
Fabrice Allain committed
713
714
            # Replace nan values in y_scores
            y_scores[np.isnan(y_scores)] = np.floor(np.nanmin(y_scores))
715
716
717
718
719
720
721
722
723
724
725
726
727
728
            metrics.update({
                'roc_auc': skm.roc_auc_score(y_true, y_scores),
                'aver_prec': skm.average_precision_score(y_true, y_scores)
            })
            metrics['allfpr'], metrics['alltpr'], metrics['rocthres'] = \
                skm.roc_curve(y_true, y_scores, pos_label=1)
            metrics['allprec'], metrics['allrec'], metrics['prthres'] = \
                skm.precision_recall_curve(y_true, y_scores)

        return metrics

    @staticmethod
    def _metrics_plot(metrics, mapnames=('', ''), plotdir="", outprefix="",
                      plot_ext="pdf"):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
729
        """
730
        
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745

        Parameters
        ----------
        metrics :
            
        mapnames :
            (Default value = ('')
        '' :
            

        Returns
        -------

        
        """
Fabrice Allain's avatar
Fabrice Allain committed
746
        outprefix = outprefix if outprefix else "maplot"
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
        csv_roc = os.path.join(plotdir, "%s.roc.csv" % outprefix)
        LOG.info("Generate roc file (%s)", csv_roc)
        with open(csv_roc, "w") as f:
            f.write("TPR,FPR,Treshold\n")
            writer = csv.writer(f)
            writer.writerows(zip(metrics['alltpr'], metrics['allfpr'],
                                 metrics['rocthres']))

        plotpath = os.path.join(plotdir, "%s.roc.%s" % (outprefix,
                                                        plot_ext))
        LOG.info("Generate roc plot (%s)", plotpath)
        plt.figure()
        plt.plot(metrics['allfpr'], metrics['alltpr'],
                 label='ROC curve (area = %0.2f)' % metrics['roc_auc'])
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.0])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic %s vs. %s' % (
            mapnames[0], mapnames[1]))
        plt.legend(loc="lower right")
        plt.savefig(plotpath)

        csv_precall = os.path.join(plotdir, "%s.roc.csv" % outprefix)
        LOG.info("Generate precall file (%s)", csv_precall)
        with open(csv_precall, "w") as f:
            f.write("Precision,Recall,Treshold\n")
            writer = csv.writer(f)
            writer.writerows(zip(metrics['allprec'], metrics['allrec'],
                                 metrics['prthres']))

        plotpath = os.path.join(plotdir, "%s.precall.%s" % (outprefix,
                                                            plot_ext))
        LOG.info("Generate precall plot (%s)", plotpath)
        # Precision recall curve
        plt.clf()
        plt.plot(metrics['allrec'], metrics['allprec'],
                 label='Precision-Recall curve')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.ylim([0.0, 1.0])
        plt.xlim([0.0, 1.0])
        plt.title('Precision-Recall {1} vs. {2}: AUC={0:0.2f}'.format(
            metrics['aver_prec'], mapnames[0], mapnames[1]))
        plt.legend(loc="lower left")
        plt.savefig(plotpath)

    def report(self, cmpmap, scoremap=None, outprefix="", outdir="", plotdir="",
796
797
798
799
               # plot_ext="pdf", plotag=True, n_factors=(0.5, 1.0, 1.5)):
               plot_ext="pdf", plotag=True):
        """
        Generate contact map report file
800
801
802

        Parameters
        ----------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
803
804
805
806
807
808
809
810
811
812
813
814
815
816
        cmpmap :
            
        scoremap :
            (Default value = None)
        outprefix :
            (Default value = "")
        outdir :
            (Default value = "")
        plotdir :
            (Default value = "")
        plot_ext :
            (Default value = "pdf")
        plotag :
            (Default value = True)
817
818
819
820

        Returns
        -------

821
        """
822
823
824
        filename = ".".join((outprefix, "mapreport")) if outprefix else \
            "mapreport"
        reportpath = "%s/%s" % (outdir, filename)
825
        LOG.info("Generate map report file (%s)", reportpath)
826

827
        # TODO: evaluate map statistics for several treshold (use submap var)
828
829
        # nb_c = int(len(self.sequence) * float(n_factors[0]))
        # submap = cmpmap.copy()
830
831
        # print(np.array_equal(cmpmap.values.astype(int).flat,
        #                      submap.topmap(scoremap, nb_c).values.astype(int).flat))
832
833
834

        y_true = list(self.values.astype(int).flat)
        y_pred = list(cmpmap.values.astype(int).flat)
Fabrice Allain's avatar
Fabrice Allain committed
835
        y_scores = scoremap.values.astype(float).flat if scoremap is not None else None
836

Fabrice Allain's avatar
Fabrice Allain committed
837
        # Compute classification metrics for the entire map
838
839
840
841
842
843
844
845
846
        metrics = self.classification_metrics(y_true, y_pred, y_scores)

        self._write_report(
            reportpath, map1name=self.desc, map2name=cmpmap.desc,
            map1path=self.path, map2path=cmpmap.path,
            seq="\n## ".join(textwrap.wrap(self.sequence, width=77)),
            protlen=len(self.sequence),
            date=datetime.date.today().strftime("%A %d. %B %Y"),
            outdir=outdir, **metrics)
847

848
        if plotag and scoremap is not None:
849
850
851
            self._metrics_plot(metrics, mapnames=(self.desc, cmpmap.desc),
                               outprefix=outprefix, plot_ext=plot_ext,
                               plotdir=plotdir)
852

853
    def compare_contactmap(self, cmpmap, contactlist, outprefix,
854
                           outdir="", distmap=None, human_idx=True):
855
856
        """
        Compare 2 contact map and plot differences
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876

        Parameters
        ----------
        cmpmap :
            param contactlist:
        outprefix :
            param outdir:
        distmap :
            param human_idx: (Default value = None)
        contactlist :
            
        outdir :
            (Default value = "")
        human_idx :
            (Default value = True)

        Returns
        -------

        
877
        """
fabrice's avatar
fabrice committed
878
        # CSV file giving TP/FP contacts
Fabrice Allain's avatar
Fabrice Allain committed
879
880
        outpath = "%s/%s.contactcmp.csv" % (outdir,
                                            outprefix if outprefix else "maplot")
881
        LOG.info("Generate stat file (%s)", outpath)
882
883
884
        with open(outpath, 'w') as outfile:
            offset = 1 if human_idx else 0
            extra_header = "" if distmap is None else ",dmin"
885
            outfile.write("#resid1,resid2,res1,res2,TP/FP%s\n" % extra_header)
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
            for x in contactlist:
                if extra_header:
                    dmin = "," + str(distmap.iat[(int(x[0]) - offset, int(x[1]) -
                                                  offset)])
                else:
                    dmin = ""
                contact = self.iat[(int(x[0]) - offset, int(x[1]) - offset)]
                cmpcontact = cmpmap.iat[(int(x[0]) - offset, int(x[1]) - offset)]
                if contact and cmpcontact:
                    eq = "TP"
                elif contact and not cmpcontact:
                    eq = "FP"
                elif not contact and cmpcontact:
                    eq = "FN"
                else:
                    eq = "TN"
902
                msg = "%s,%s,%s,%s,%s%s\n" % (x[0], x[1],
903
                                            self.sequence[int(x[0]) - offset],
904
905
                                            self.sequence[int(x[1]) - offset],
                                            eq,
906
                                            dmin)
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
907
                outfile.write(msg)
fabrice's avatar
fabrice committed
908

Fabrice Allain's avatar
Fabrice Allain committed
909
    def write_contacts(self, filename, outdir="", prefix="", human_idx=True,
910
                       scoremap=None):
911
        """
912
        
913

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
914
915
        Parameters
        ----------
Fabrice Allain's avatar
Fabrice Allain committed
916
        prefix
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
917
918
919
920
921
922
923
924
925
926
927
928
929
        filename :
            param outdir:
        human_idx :
            param scoremap: (Default value = True)
        outdir :
            (Default value = "")
        scoremap :
            (Default value = None)

        Returns
        -------

        
930
        """
Fabrice Allain's avatar
Fabrice Allain committed
931
932
        prefix = prefix + "_" if prefix else prefix
        filepath = "%s/%s%s.contact.txt" % (outdir, prefix, filename.replace(".", "_"))
933
        LOG.info("Generate contact file (%s)", filepath)
934
935
936
937
938
939
        with open(filepath, 'w') as outfile:
            offset = 1 if human_idx else 0
            # contacts = [sorted(contact) for contact in self.contactset()]
            # for contact in sorted(contacts):
            for contact in self.contactset():
                contact = sorted([contact[0] + offset, contact[1] + offset])
940
941
942
                if scoremap is not None:
                    score = scoremap.iat[(int(contact[0]) - offset,
                                          int(contact[1]) - offset)]
943
                    outfile.write("%d %d %.4f\n" %
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
944
                                  (contact[0], contact[1], score))
945
                else:
946
                    outfile.write("%d %d\n" % (contact[0], contact[1]))
fabrice's avatar
fabrice committed
947

fabrice's avatar
fabrice committed
948

fabrice's avatar
fabrice committed
949
class ResAtmMap(ProteinMap):
950
951
    """
    Protein distance/contact matrix for all atom pairs. If no sequence given,
fabrice's avatar
fabrice committed
952
953
954
955
956
957
958
959
960
961
962
    protein distance/contact matrix for all amino acids
    Ex:
    residue           PHE1                                                    \
    atom               CD1       CD2        CB        CA        CG        CZ
    residue atom
    PHE1    CD1   0.000000  2.394145  2.455440  3.269219  1.391024  2.421148
            CD2   2.394145  0.000000  2.509243  3.407996  1.379875  2.401098
            CB    2.455440  2.509243  0.000000  1.507025  1.478053  4.267602
            CA    3.269219  3.407996  1.507025  0.000000  2.505414  5.085997
            CG    1.391024  1.379875  1.478053  2.505414  0.000000  2.790403

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
963
964
965
966
967
968
969
    Parameters
    ----------

    Returns
    -------

    
fabrice's avatar
fabrice committed
970
971
    """

Fabrice Allain's avatar
Fabrice Allain committed
972
973
    # def _constructor_expanddim(self):
    #     super(ResAtmMap, self)._constructor_expanddim()
fabrice's avatar
fabrice committed
974

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
975
    def __init__(self, sequence, **kwargs):
fabrice's avatar
fabrice committed
976
977
978
979
980
        # Sequence: 1L string or MultiIndex object
        # Dataframe is in 3L code
        if not sequence:
            sequence = ConversionTable.ConversionTable().table['AMINO_ACID'][
                'iupac'].keys()
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
981
982
983
        # Super call will initialize index and columns with self.create_index()
        super(ResAtmMap, self).__init__(sequence=sequence, **kwargs)
        self._sequence = sequence
fabrice's avatar
fabrice committed
984
985
986

    @property
    def sequence(self):
987
988
        """
        Amino Acid sequence string in humanidx
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
989
990
991
992
993
994
995
996

        Parameters
        ----------

        Returns
        -------

        
997
        """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
        if self._sequence:
            # If non empty string
            return self._sequence
        elif self.index:
            self._sequence = "".join(AminoAcid.AminoAcid(_.split("-")[1])[0]
                                     for _ in self.index.levels[0])
        else:
            # No information given
            self._sequence = "".join(sorted(ConversionTable.ConversionTable().table[
                'AMINO_ACID']['iupac'].keys()))
        return self._sequence
fabrice's avatar
fabrice committed
1009

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1010
    def create_index(self, sequence, seq_pos=True, seqidx=None, idxnames=None,
1011
                     colnames=None, **kwargs):
1012
        """
1013
        
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1014
1015
        Parameters
        ----------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1016
1017
1018
1019
1020
        sequence
        seq_pos
        seqidx: pandas.MultiIndex, optional
        idxnames
        colnames
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1021
1022
1023

        Returns
        -------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1024
        tuple of pandas.MultiIndex objects
1025
        """
1026
        LOG.debug("Indexing ResAtm - ResAtm dataframe")
fabrice's avatar
fabrice committed
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
        # Atom table for residues (keys are in 3L code)
        seqidx = seqidx if seqidx and len(seqidx) == len(sequence) else None
        iupac_aa = ConversionTable.ConversionTable().table['AMINO_ACID'][
            'iupac']
        # Amino acid conversion table 1L -> 3L
        # Making humanidx list for pandas dataframe
        seq = [AminoAcid.AminoAcid(aa)[1] for aa in sequence]
        # Repeat each res for each heavy atm (humanidx value + 1 in order to
        # start at 1 as in pdb file)
        if set(sequence) == sequence:
            # General ResAtmMap for 20 aminoacid
            res_list = ["%s" % aa for aa in seq for _ in
                        (filter(self.heavy_reg.match, iupac_aa[aa].keys()))]
        elif seqidx:
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1041
            # If we already have an index related to the sequence
fabrice's avatar
fabrice committed
1042
1043
            res_list = [
                "%03d-%s" % (seqidx[i], aa) for i, aa in enumerate(seq)
1044
                for _ in filter(self.heavy_reg.match, iupac_aa[aa].keys())]
fabrice's avatar
fabrice committed
1045
        else:
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1046
1047
1048
1049
            # If no index related to the sequence
            res_list = [
                "%03d-%s" % (i + 1, aa) for i, aa in enumerate(seq)
                for _ in filter(self.heavy_reg.match, iupac_aa[aa].keys())]
fabrice's avatar
fabrice committed
1050
1051
1052
1053
1054
        # TODO: Inutile de repeter a chaque fois le calcul des listes
        # d'atomes lourd pour chaque residu -> Deplacer au niveau de l'init
        atm_list = [atm for aa in seq for atm in filter(self.heavy_reg.match,
                                                        iupac_aa[aa].keys())]
        if len(atm_list) != len(res_list):
1055
1056
            LOG.error("Index lists aren't the same size\n%s\n%s",
                      res_list, atm_list)
fabrice's avatar
fabrice committed
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066

        idxnames = idxnames if idxnames and len(idxnames) == 2 else [
            "residuex", "atomx"]
        colnames = colnames if colnames and len(colnames) == 2 else [
            "residuey", "atomy"]
        index = pd.MultiIndex.from_tuples(list(zip(*[res_list, atm_list])),
                                          names=idxnames)
        columns = pd.MultiIndex.from_tuples(list(zip(*[res_list,
                                                       atm_list])),
                                            names=colnames)
1067
        LOG.debug("Index:\n%s", index)
fabrice's avatar
fabrice committed
1068
1069
1070
        return index, columns

    def create_heatmap(self):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1071
        """:return:"""
fabrice's avatar
fabrice committed
1072
1073
1074
1075
1076
1077
1078
        unidf = self.reduce()
        if unidf.isnull().values.sum() == 0:
            # If there's no nan values
            return sns.heatmap(unidf)
        return None

    def reduce(self, groupby="min"):
1079
        """
1080
        
1081

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1082
1083
1084
1085
1086
1087
1088
1089
1090
        Parameters
        ----------
        groupby :
            return: (Default value = "min")

        Returns
        -------

        
1091
        """
fabrice's avatar
fabrice committed
1092
1093
        if self.index.nlevels == 1 or not groupby:
            return self
1094
        newmap = ResMap(self.sequence, dtype=self.dtype, desc=self.desc,
1095
                        sym=self.sym, path=self.path)
fabrice's avatar
fabrice committed
1096
        if self.dtype == bool:
Fabrice Allain's avatar
Fabrice Allain committed
1097
            newmap[:] = self.copy().stack(dropna=False).groupby(level=0).any()
fabrice's avatar
fabrice committed
1098
        elif groupby == "mean":
Fabrice Allain's avatar
Fabrice Allain committed
1099
            newmap[:] = self.copy().stack(dropna=False).groupby(level=0).mean()
fabrice's avatar
fabrice committed
1100
        elif groupby == "min":
Fabrice Allain's avatar
Fabrice Allain committed
1101
            newmap[:] = self.copy().stack(dropna=False).groupby(level=0).min()
fabrice's avatar
fabrice committed
1102
1103
        return newmap

1104
    def contact_map(self, contactdef, scsc_min=None, def_cut=5.0):
1105
1106
        """
        Contact map generator from all atoms distance map. There's a contact
fabrice's avatar
fabrice committed
1107
        with 2 residues iff dist between 2 atoms are below the given treshold
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1108
1109
1110
1111

        Parameters
        ----------
        def_cut :
Fabrice Allain's avatar
Fabrice Allain committed
1112
            (Default value = 5.0)
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1113
        contactdef :
Fabrice Allain's avatar
Fabrice Allain committed
1114
            for all atom pair 
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1115
1116
1117
1118
1119
1120
1121
        scsc_min :
            (Default value = None)

        Returns
        -------

        
fabrice's avatar
fabrice committed
1122
1123
1124
1125
        """
        if self.dtype == bool:
            # If self is already a contact map
            return None
fabrice's avatar
fabrice committed
1126
        # TODO: issue with sc_sc treshold !!!!!
1127
1128
        LOG.info("Generate contact map using contact definition %s",
                 contactdef)
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1129
        # Initialize contact map to a boolean matrix filled with False as values
1130
        contact_map = ResAtmMap(sequence=self.sequence, mtype="contact",
1131
                                desc=self.desc, sym=self.sym, path=self.path)
1132
1133
        def_cutoff = float(contactdef.get("default_cutoff")) if float(
            contactdef.get("default_cutoff")) else def_cut
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1134
        treshold = None
fabrice's avatar
fabrice committed
1135
1136

        if type(contactdef) == float:
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1137
            # ???
fabrice's avatar
fabrice committed
1138
1139
            contact_map[:] = self.applymap(lambda x: x < contactdef)

Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1140
1141
        elif sum(x is not None for x in contactdef.values()) == 1 \
                and def_cutoff:
1142
            LOG.info("Using default cutoff")
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1143
1144
            contact_map[:] = self.applymap(
                lambda x: x < contactdef.get("default_cutoff"))
fabrice's avatar
fabrice committed
1145
1146
1147
1148
1149
1150
1151

        elif sum(x is not None for x in contactdef.values()) > 1:
            # treshconv = lambda x: x < contactdef.get("default_cutoff")
            # contact_map[:] = self.applymap(treshconv)
            atm_list = set(self.index.get_level_values(1))
            atms_list = set([(a, b) for a in atm_list for b in atm_list])
            for pair in contactdef.keys():
Fabrice Allain's avatar
Fabrice Allain committed
1152
                treshold = contactdef[pair]
fabrice's avatar
fabrice committed
1153
                if pair == "default_cutoff":
Fabrice Allain's avatar
Fabrice Allain committed
1154
1155
                    # Since there is more than one treshold, we do not take
                    # default cutoff into account
fabrice's avatar
fabrice committed
1156
                    continue
Fabrice Allain's avatar
Fabrice Allain committed
1157
1158
1159
                if pair == "all":
                    tmp = self.applymap(lambda x: x < treshold)
                    contact_map.update(tmp)
fabrice's avatar
fabrice committed
1160
                    continue
fabrice's avatar
fabrice committed
1161
                else:
Fabrice Allain's avatar
Fabrice Allain committed
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
                    pair = tuple(pair.upper().split("_"))
                    LOG.info(
                        "Filtering values in matrix related to %s (%s)",
                        str(pair), str(treshold))
                    if pair in (("SC", "SC"), ("sc", "sc")):
                        # Use scsc_min to apply treshold updateonly for selected atom
                        # sidechain
                        idx_list = []
                        col_list = []
                        for res1 in scsc_min:
                            for res2 in scsc_min[res1]:
                                pair = (AminoAcid.AminoAcid(res1)[1],
                                        AminoAcid.AminoAcid(res2)[1])
                                atm1, atm2 = scsc_min[res1][res2]
                                idx_list.append(
                                    self.index.map(lambda x: x[0].endswith(
                                        pair[0]) and x[1] == atm1))
                                col_list.append(
                                    self.index.map(lambda x: x[0].endswith(
                                        pair[1]) and x[1] == atm2))
                        mask = ([any(tup) for tup in zip(*idx_list)],
                                [any(tup) for tup in zip(*col_list)])
                    elif pair not in atms_list:
                        LOG.error("Pair %s doesn't exist ...", str(pair))
                        # Already applied a treshold for this pair
                        continue
                    else:
                        LOG.debug("Apply treshold for %s", str(pair))
                        atms_list.discard(pair)
                        # Selecting rows for each atom
                        mask = (self.index.get_level_values(1) == pair[0],
                                self.index.get_level_values(1) == pair[1])
                    tmp = self.loc[mask].apply(lambda x: x < float(treshold))
                    contact_map.update(tmp)
fabrice's avatar
fabrice committed
1196
        else:
1197
            LOG.error("Missing values in contact definition section. Add "
1198
                      "at least a default_cutoff value.")
1199
        LOG.debug("Contact map\n%s", contact_map.head())
fabrice's avatar
fabrice committed
1200
1201
        return contact_map

1202
    def copy(self, **kwargs):
1203
1204
        """
        Copy dataframe and related attributes of the map
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214

        Parameters
        ----------
        **kwargs :
            

        Returns
        -------

        
1215
1216
1217
1218
1219
1220
        """
        df = super(Map, self).copy()
        return ResAtmMap(
            sequence=self.sequence, path=self.path, data=df, desc=self.desc,
            sym=self.sym, mtype=self.mtype, dtype=self.dtype)

fabrice's avatar
fabrice committed
1221
1222

class ResMap(ResAtmMap):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1223
    """Res - res distance/contact matrix"""
fabrice's avatar
fabrice committed
1224
1225

    def __init__(self, sequence, **kwargs):
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1226
1227
1228
1229
        if not sequence:
            sequence = ConversionTable.ConversionTable().table['AMINO_ACID'][
                'iupac'].keys()
        super(ResMap, self).__init__(sequence, **kwargs)
fabrice's avatar
fabrice committed
1230

Fabrice Allain's avatar
Fabrice Allain committed
1231
1232
    # def _constructor_expanddim(self):
    #     super(ResMap, self)._constructor_expanddim()
fabrice's avatar
fabrice committed
1233
1234
1235

    @property
    def sequence(self):
1236
1237
        """
        Amino Acid sequence string in humanidx
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1238
1239
1240
1241
1242
1243
1244
1245

        Parameters
        ----------

        Returns
        -------

        
1246
        """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1247
1248
1249
        # return "".join(AminoAcid.AminoAcid(aa.split("-")[1])[0] for aa in
        #                self.index)
        return super(ResMap, self).sequence
fabrice's avatar
fabrice committed
1250
1251
1252

    def create_index(self, sequence, seqidx=None, idxnames=None,
                     colnames=None, **kwargs):
1253
        """
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1254
        
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1255
1256
        Parameters
        ----------
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1257
1258
1259
1260
1261
        sequence
        seqidx: pandas.Index
        idxnames
        colnames
        kwargs
Fabrice  ALLAIN's avatar
Fabrice ALLAIN committed
1262
1263
1264
1265

        Returns
        -------

1266
        """
1267
        LOG.debug("Indexing Res - Res dataframe")
fabrice's avatar
fabrice committed
1268
1269