setup.py 13.1 KB
Newer Older
1
# coding=utf-8
fabrice's avatar
fabrice committed
2
3
4
5
6
7
"""
                            Input/Output aria_ec scripts
"""
from __future__ import absolute_import, division, print_function

import os
8
9
import json
import logging
fabrice's avatar
fabrice committed
10
11

from .protein import Protein
12
from .reader import MapFileListReader
fabrice's avatar
fabrice committed
13
14
15
16
17
18
from .base import get_filename
from .protmap import MapFilter
from .econverter import AriaEcXMLConverter

# TODO: S'inspirer de pandas/__init__.py pour les dependances

19
LOG = logging.getLogger(__name__)
fabrice's avatar
fabrice committed
20
21


22
23
24
25
26
class AriaEcSetup(object):
    """
    Aria Ec Setup protocol
    """

fabrice's avatar
fabrice committed
27
    def __init__(self, settings):
fabrice's avatar
fabrice committed
28
29
30
31
        """
        :param settings:
        :return:
        """
fabrice's avatar
fabrice committed
32
33
34
35
        # TODO: check_type settings (AriaEcSettings)
        self.settings = settings
        self.protein = Protein(settings)
        self.outprefix = ''
36
        self.reader = MapFileListReader(cont_def=settings.contactdef.config)
fabrice's avatar
fabrice committed
37
38
39
40
41
42
43
44
        self.allresmap = {}
        self.targetmap = None
        self.refmaps = None
        self.hbmaps = None
        self.filter = MapFilter(settings.setup.config)
        self.converter = AriaEcXMLConverter(settings)

    def run(self):
fabrice's avatar
fabrice committed
45
        """
46
        main method
fabrice's avatar
fabrice committed
47
48
        :return:
        """
fabrice's avatar
fabrice committed
49
        # Check input
50
51
52
53
        LOG.debug("Settings:\n" + json.dumps(self.settings.setup.config,
                                             indent=4))
        LOG.debug("Args:\n" + json.dumps(self.settings.setup.args,
                                         indent=4))
fabrice's avatar
fabrice committed
54
        self.settings.make_infra()
fabrice's avatar
fabrice committed
55
56
57
58
59
        # -------------------------------------------------------------------- #
        # ----------------------------- Input -------------------------------- #
        # -------------------------------------------------------------------- #
        self.outprefix = get_filename(self.settings.setup.args.get("seq",
                                                                   None))
60
        self.converter.outprefix = self.outprefix
fabrice's avatar
fabrice committed
61
62
63
64
65
        # ------------------------- Load sequence ---------------------------- #
        self.protein.set_aa_sequence(self.settings.setup.args.get("seq", None))
        # -------------- Load secondary structure prediction ----------------- #
        self.protein.set_sec_struct(self.settings.setup.args.get("sspred",
                                                                 None),
66
                                    ssdist_filename=self.settings.ssdist,
fabrice's avatar
fabrice committed
67
68
69
70
71
72
                                    ssidx=self.settings.setup.args.get(
                                        "ssidx", False))
        # -------------------------------------------------------------------- #
        # ---------------------------- Processing ---------------------------- #
        # -------------------------------------------------------------------- #
        # TODO: write submatrix in a file
73
        # TODO: change read method in reader to __call__
fabrice's avatar
fabrice committed
74
75
        # -------------------------- contact maps ---------------------------- #
        self.reader.read(self.settings.setup.args.get("infiles"),
76
77
                         filetypelist=self.settings.setup.args.get(
                             "contact_types"),
fabrice's avatar
fabrice committed
78
                         protein=self.protein,
79
80
                         groupby_method=self.settings.setup.config[
                             'groupby_method'],
fabrice's avatar
fabrice committed
81
                         scsc=self.settings.scsc_min)
82
        for mapfile in self.reader.filelist:
fabrice's avatar
fabrice committed
83
84
85
            # fo need a contactmap in order to wite XML dist restraints
            # TODO: filter pour toutes les map de mapdict !! (fonction remove
            #  s'applique sur l'humanidx contenant les residus)
86
87
88
            self.filter(mapfile.mapdict, mapfile.filetype, mapfile.contactlist,
                        self.protein, clashlist=mapfile.clashlist,
                        outprefix=self.outprefix,
89
                        outdir=self.settings.infra.get("others", ''))
90
            self.allresmap[mapfile.filetype] = mapfile.mapdict
fabrice's avatar
fabrice committed
91

92
93
            if mapfile.filetype != "pdb" and "pdb" in self.allresmap:
                mapfile.contactmap.compareplot(self.allresmap["pdb"])
fabrice's avatar
fabrice committed
94
95
96
97

        # ---------------------------- target map ---------------------------- #
        if self.settings.setup.args.get("distfile") and \
                self.settings.setup.config.get("distance_type") == "distfile":
98
            LOG.info("Loading target distance file")
fabrice's avatar
fabrice committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
            # Read distance file
            self.reader.read(
                self.settings.setup.args.get("distfile"),
                filetypelist="distfile",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.targetmap = self.converter.targetdistmap(
                self.settings.setup.config.get("distance_type"),
                self.protein.aa_sequence.sequence,
                distfile=self.reader.filelist[0],
                groupby=self.settings.setup.config['groupby_method'])
        else:
            # Default targetmap using config parameters
            self.targetmap = self.converter.targetdistmap(
                self.settings.setup.config.get("distance_type"),
                self.protein.aa_sequence.sequence,
                groupby=self.settings.setup.config['groupby_method'])

        # ---------------------------- ref map ----------------------------- #
        if self.settings.setup.args.get("ref"):
            self.reader.read(
                self.settings.setup.args.get("ref"),
                filetypelist="pdb",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.refmaps = self.reader.filelist[0].mapdict

        # ---------------------------- hbond map ----------------------------- #
        if self.settings.setup.args.get("hb") and \
                self.settings.setup.config.get("longrange_hb", False):
            self.reader.read(
                self.settings.setup.args.get("hb"),
                filetypelist="metapsicovhb",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.hbmaps = self.reader.filelist[0].mapdict
        elif self.settings.setup.config.get("longrange_hb", False):
            # Create HBMAP with naive metapsicov method
            # Consider as hbond ec predicted between beta strand
            raise NotImplementedError

        # -------------------------------------------------------------------- #
        # ------------------------------ Output ------------------------------ #

        # ----------------------------- SEQ file ----------------------------- #
147
148
149
150
151
        self.protein.write_seq(
            os.path.join(self.settings.infra.get("others", ''),
                         self.outprefix + ".seq"))
        # Load aria molecule object from seq file and convert it into xml format
        LOG.info("Load molecule file and convert it into xml format")
152
        self.converter.load_molecule(self.protein.seqfile_path)
fabrice's avatar
fabrice committed
153
154
155
156
        # --------------------------- TBL restraints ------------------------- #
        # Setting contact number limit for hbmap
        n_hb = int(len(self.protein.aa_sequence.sequence) *
                   self.settings.setup.config.get("nf_longrange_hb"))
157
        LOG.info("Writing tbl files ...")
fabrice's avatar
fabrice committed
158
159
160
161
162
        tbl_files = self.converter.write_tbl_restraints(
            self.protein, hbmap=self.hbmaps, n_hb=n_hb)

        # --------------------------- XML restraints ------------------------- #
        # Setting contact number limit for map restraints (native, ec, ...)
163

164
165
166
167
168
        # dist_files, pair_lists = self.converter.write_maplist_restraints(
        #     self.allresmap, self.targetmap)

        dist_files = self.converter.write_maplist_restraints(
            self.allresmap, self.targetmap)[0]
fabrice's avatar
fabrice committed
169
170

        # --------------------------- XML SEQ file --------------------------- #
171
        xmlseq_file = self.converter.write_xmlseq()
fabrice's avatar
fabrice committed
172
173

        # ---------------------- ARIA XML project file ----------------------- #
174
        self.converter.write_ariaproject(self.settings.template,
175
176
                                         xmlseq_file, dist_files, tbl_files,
                                         desclist=self.allresmap.keys())
fabrice's avatar
fabrice committed
177
178
179
180
        # ------------------------------ others ------------------------------ #
        self.write_optional_files()

    def write_optional_files(self):
fabrice's avatar
fabrice committed
181
        """
182
        Write filtered contacts & distance maps (.csv)
fabrice's avatar
fabrice committed
183
184
        :return:
        """
fabrice's avatar
fabrice committed
185
186
187
188
189
        # Indextableplus file (submatrix)
        # Contacts_refined.out
        for maptype in self.allresmap:
            self.allresmap[maptype].get("contactmap").write_contacts(
                "_".join((self.outprefix, maptype, "filtered")),
190
191
                self.settings.infra.get("others"), scoremap=self.allresmap[
                    maptype].get("scoremap"))
fabrice's avatar
fabrice committed
192
193
194
195
196
            if self.allresmap[maptype]["alldistmap"] is not None:
                self.allresmap[maptype]["alldistmap"].to_csv(
                    "%s/%s.distmap.csv" % (self.settings.infra.get("others"),
                                           maptype))
            if self.refmaps:
197
198
199
200
201
                self._write_contacts(
                    self.allresmap[maptype].get("filteredlist", None),
                    self.protein.aa_sequence.sequence,
                    self.settings.infra.get("others", ''),
                    "_".join((self.outprefix, maptype, "filtered")),
fabrice's avatar
fabrice committed
202
203
204
205
206
                    ref=self.refmaps["contactmap"],
                    distmap=self.refmaps["distmap"])
        if self.refmaps:
            self.refmaps["alldistmap"].to_csv(
                "%s/%s_%s.distmap.csv" % (self.settings.infra.get("others"),
207
208
                                          self.outprefix,
                                          self.refmaps.filetype))
fabrice's avatar
fabrice committed
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223

    def _write_contacts(self, contacts_list, seq, out, prefix, nc=None,
                        append=False, ref=None, distmap=None):
        """
        Write contacts from contact_list (sorted !)
        :param contacts_list:
        :param seq:
        :param out:
        :param prefix:
        :param nc:
        :param append:
        :param ref:
        :return:
        """
        mapy = []
224
        tp_count = 0
fabrice's avatar
fabrice committed
225
226
227
228
229
230
231
        filemode = 'a' if append else 'w'
        dist_desc = '\tTP/FP\tdist%s(ref)' % self.settings.setup.config[
            "groupby_method"] if \
            distmap is not None \
            else ''
        with open("%s/%s.contacts.txt" % (out, prefix), filemode) as outfile:
            if not append:
232
233
                outfile.write(
                    '''# resid1\tresid2\tres1\tres2%s\n''' % dist_desc)
fabrice's avatar
fabrice committed
234

235
236
237
            if hasattr(contacts_list, 'keys'):
                contacts = contacts_list.keys() if not nc else \
                    contacts_list.keys()[:nc]
fabrice's avatar
fabrice committed
238
239
240
241
242
243
244
                d_type = True
            else:
                # Check if contacts is 2-tuple
                if False in [len(item) == 2 for item in contacts_list]:
                    raise TypeError('Contact list must be 2-tuple !')
                contacts = contacts_list if not nc else contacts_list[:nc]
                d_type = False
245

246
            LOG.debug("Contact list %s", contacts)
fabrice's avatar
fabrice committed
247
248
249
250
251
252
253
254
255
256
            for contact in contacts:

                if d_type:
                    # If dictionary
                    resid1 = int(contacts_list[contact].setdefault('res1_nb'))
                    resid2 = int(contacts_list[contact].setdefault('res2_nb'))
                else:
                    resid1 = int(contact[0])
                    resid2 = int(contact[1])

257
                LOG.debug("Contact %s", str(contact))
fabrice's avatar
fabrice committed
258
259
260
261
262
263
264
265
266
267
                if distmap is not None:
                    dist = distmap.ix[(resid1, resid2)]
                else:
                    dist = {'dist': '', 'atoms': ''}

                if (resid1, resid2) not in mapy:
                    mapy.append((resid1, resid2))
                    if ref is not None:
                        if ref.ix[(resid1, resid2)]:
                            asses = 'TP'
268
                            tp_count += 1
fabrice's avatar
fabrice committed
269
270
271
272
273
274
275
276
277
278
279
280
281
282
                        else:
                            asses = 'FP'
                        outfile.write(
                            "%s\t%s\t%s\t%s\t%s\t%s\n" % (resid1 + 1,
                                                          resid2 + 1,
                                                          seq[resid1],
                                                          seq[resid2], asses,
                                                          dist))
                    else:
                        outfile.write("%s\t%s\t%s\t%s\n" % (resid1 + 1,
                                                            resid2 + 1,
                                                            seq[resid1],
                                                            seq[resid2]))

283
            ptp = (tp_count / float(len(contacts))) * 100.0 if ref is not None else \
fabrice's avatar
fabrice committed
284
285
286
287
                None
            outfile.write('''
# TP number : {tp} ({ptp:.2f} %)
# Number of contacts : {nc}
288
    '''.format(tp=tp_count, ptp=ptp, nc=len(contacts)))
fabrice's avatar
fabrice committed
289
290
291
292
293
294
295


if __name__ == "__main__":
    # Test AriaEcCommand object
    from .ecsettings import AriaEcSettings

    logging.basicConfig(level=logging.DEBUG)
296
    LOG = logging.getLogger("Setup")
297
    AriaEcSettings('setup').load_config('aria_ec.ini')