setup.py 13.1 KB
Newer Older
1
# coding=utf-8
fabrice's avatar
fabrice committed
2
3
4
5
6
7
"""
                            Input/Output aria_ec scripts
"""
from __future__ import absolute_import, division, print_function

import os
8
9
import json
import logging
fabrice's avatar
fabrice committed
10
11
12
13
14
15
16
17
18

from .protein import Protein
from .reader import ProtFileListReader
from .base import get_filename
from .protmap import MapFilter
from .econverter import AriaEcXMLConverter

# TODO: S'inspirer de pandas/__init__.py pour les dependances

19
LOG = logging.getLogger(__name__)
fabrice's avatar
fabrice committed
20
21


22
23
24
25
26
class AriaEcSetup(object):
    """
    Aria Ec Setup protocol
    """

fabrice's avatar
fabrice committed
27
    def __init__(self, settings):
fabrice's avatar
fabrice committed
28
29
30
31
        """
        :param settings:
        :return:
        """
fabrice's avatar
fabrice committed
32
33
34
35
36
37
38
39
40
41
42
43
44
        # TODO: check_type settings (AriaEcSettings)
        self.settings = settings
        self.protein = Protein(settings)
        self.outprefix = ''
        self.reader = ProtFileListReader(cont_def=settings.contactdef.config)
        self.allresmap = {}
        self.targetmap = None
        self.refmaps = None
        self.hbmaps = None
        self.filter = MapFilter(settings.setup.config)
        self.converter = AriaEcXMLConverter(settings)

    def run(self):
fabrice's avatar
fabrice committed
45
        """
46
        main method
fabrice's avatar
fabrice committed
47
48
        :return:
        """
fabrice's avatar
fabrice committed
49
        # Check input
50
51
52
53
        LOG.debug("Settings:\n" + json.dumps(self.settings.setup.config,
                                             indent=4))
        LOG.debug("Args:\n" + json.dumps(self.settings.setup.args,
                                         indent=4))
fabrice's avatar
fabrice committed
54
        self.settings.make_infra()
fabrice's avatar
fabrice committed
55
56
57
58
59
        # -------------------------------------------------------------------- #
        # ----------------------------- Input -------------------------------- #
        # -------------------------------------------------------------------- #
        self.outprefix = get_filename(self.settings.setup.args.get("seq",
                                                                   None))
60
        self.converter.outprefix = self.outprefix
fabrice's avatar
fabrice committed
61
62
63
64
65
        # ------------------------- Load sequence ---------------------------- #
        self.protein.set_aa_sequence(self.settings.setup.args.get("seq", None))
        # -------------- Load secondary structure prediction ----------------- #
        self.protein.set_sec_struct(self.settings.setup.args.get("sspred",
                                                                 None),
66
                                    ssdist_filename=self.settings.ssdist,
fabrice's avatar
fabrice committed
67
68
69
70
71
72
                                    ssidx=self.settings.setup.args.get(
                                        "ssidx", False))
        # -------------------------------------------------------------------- #
        # ---------------------------- Processing ---------------------------- #
        # -------------------------------------------------------------------- #
        # TODO: write submatrix in a file
73
        # TODO: change read method in reader to __call__
fabrice's avatar
fabrice committed
74
75
        # -------------------------- contact maps ---------------------------- #
        self.reader.read(self.settings.setup.args.get("infiles"),
76
77
                         filetypelist=self.settings.setup.args.get(
                             "contact_types"),
fabrice's avatar
fabrice committed
78
                         protein=self.protein,
79
80
                         groupby_method=self.settings.setup.config[
                             'groupby_method'],
fabrice's avatar
fabrice committed
81
                         scsc=self.settings.scsc_min)
82
        for mapfile in self.reader.filelist:
fabrice's avatar
fabrice committed
83
84
85
            # fo need a contactmap in order to wite XML dist restraints
            # TODO: filter pour toutes les map de mapdict !! (fonction remove
            #  s'applique sur l'humanidx contenant les residus)
86
87
88
            self.filter(mapfile.mapdict, mapfile.filetype, mapfile.contactlist,
                        self.protein, clashlist=mapfile.clashlist,
                        outprefix=self.outprefix,
89
                        outdir=self.settings.infra.get("others", ''))
90
            self.allresmap[mapfile.filetype] = mapfile.mapdict
fabrice's avatar
fabrice committed
91

92
93
            if mapfile.filetype != "pdb" and "pdb" in self.allresmap:
                mapfile.contactmap.compareplot(self.allresmap["pdb"])
fabrice's avatar
fabrice committed
94
95
96
97

        # ---------------------------- target map ---------------------------- #
        if self.settings.setup.args.get("distfile") and \
                self.settings.setup.config.get("distance_type") == "distfile":
98
            LOG.info("Loading target distance file")
fabrice's avatar
fabrice committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
            # Read distance file
            self.reader.read(
                self.settings.setup.args.get("distfile"),
                filetypelist="distfile",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.targetmap = self.converter.targetdistmap(
                self.settings.setup.config.get("distance_type"),
                self.protein.aa_sequence.sequence,
                distfile=self.reader.filelist[0],
                groupby=self.settings.setup.config['groupby_method'])
        else:
            # Default targetmap using config parameters
            self.targetmap = self.converter.targetdistmap(
                self.settings.setup.config.get("distance_type"),
                self.protein.aa_sequence.sequence,
                groupby=self.settings.setup.config['groupby_method'])

        # ---------------------------- ref map ----------------------------- #
        if self.settings.setup.args.get("ref"):
            self.reader.read(
                self.settings.setup.args.get("ref"),
                filetypelist="pdb",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.refmaps = self.reader.filelist[0].mapdict

        # ---------------------------- hbond map ----------------------------- #
        if self.settings.setup.args.get("hb") and \
                self.settings.setup.config.get("longrange_hb", False):
            self.reader.read(
                self.settings.setup.args.get("hb"),
                filetypelist="metapsicovhb",
                protein=self.protein,
                groupby_method=self.settings.setup.config['groupby_method'],
                scsc=self.settings.scsc_min)
            self.hbmaps = self.reader.filelist[0].mapdict
        elif self.settings.setup.config.get("longrange_hb", False):
            # Create HBMAP with naive metapsicov method
            # Consider as hbond ec predicted between beta strand
            raise NotImplementedError

        # -------------------------------------------------------------------- #
        # ------------------------------ Output ------------------------------ #

        # ----------------------------- SEQ file ----------------------------- #
147
148
149
150
151
        self.protein.write_seq(
            os.path.join(self.settings.infra.get("others", ''),
                         self.outprefix + ".seq"))
        # Load aria molecule object from seq file and convert it into xml format
        LOG.info("Load molecule file and convert it into xml format")
152
        self.converter.load_molecule(self.protein.seqfile_path)
fabrice's avatar
fabrice committed
153
154
155
156
        # --------------------------- TBL restraints ------------------------- #
        # Setting contact number limit for hbmap
        n_hb = int(len(self.protein.aa_sequence.sequence) *
                   self.settings.setup.config.get("nf_longrange_hb"))
157
        LOG.info("Writing tbl files ...")
fabrice's avatar
fabrice committed
158
159
160
161
162
        tbl_files = self.converter.write_tbl_restraints(
            self.protein, hbmap=self.hbmaps, n_hb=n_hb)

        # --------------------------- XML restraints ------------------------- #
        # Setting contact number limit for map restraints (native, ec, ...)
163

164
165
166
167
168
        # dist_files, pair_lists = self.converter.write_maplist_restraints(
        #     self.allresmap, self.targetmap)

        dist_files = self.converter.write_maplist_restraints(
            self.allresmap, self.targetmap)[0]
fabrice's avatar
fabrice committed
169
170

        # --------------------------- XML SEQ file --------------------------- #
171
        xmlseq_file = self.converter.write_xmlseq()
fabrice's avatar
fabrice committed
172
173

        # ---------------------- ARIA XML project file ----------------------- #
174
        self.converter.write_ariaproject(self.settings.template,
175
                                         xmlseq_file, dist_files, tbl_files)
fabrice's avatar
fabrice committed
176
177
178
179
        # ------------------------------ others ------------------------------ #
        self.write_optional_files()

    def write_optional_files(self):
fabrice's avatar
fabrice committed
180
        """
181
        Write filtered contacts & distance maps (.csv)
fabrice's avatar
fabrice committed
182
183
        :return:
        """
fabrice's avatar
fabrice committed
184
185
186
187
188
        # Indextableplus file (submatrix)
        # Contacts_refined.out
        for maptype in self.allresmap:
            self.allresmap[maptype].get("contactmap").write_contacts(
                "_".join((self.outprefix, maptype, "filtered")),
189
190
                self.settings.infra.get("others"), scoremap=self.allresmap[
                    maptype].get("scoremap"))
fabrice's avatar
fabrice committed
191
192
193
194
195
            if self.allresmap[maptype]["alldistmap"] is not None:
                self.allresmap[maptype]["alldistmap"].to_csv(
                    "%s/%s.distmap.csv" % (self.settings.infra.get("others"),
                                           maptype))
            if self.refmaps:
196
197
198
199
200
                self._write_contacts(
                    self.allresmap[maptype].get("filteredlist", None),
                    self.protein.aa_sequence.sequence,
                    self.settings.infra.get("others", ''),
                    "_".join((self.outprefix, maptype, "filtered")),
fabrice's avatar
fabrice committed
201
202
203
204
205
                    ref=self.refmaps["contactmap"],
                    distmap=self.refmaps["distmap"])
        if self.refmaps:
            self.refmaps["alldistmap"].to_csv(
                "%s/%s_%s.distmap.csv" % (self.settings.infra.get("others"),
206
207
                                          self.outprefix,
                                          self.refmaps.filetype))
fabrice's avatar
fabrice committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222

    def _write_contacts(self, contacts_list, seq, out, prefix, nc=None,
                        append=False, ref=None, distmap=None):
        """
        Write contacts from contact_list (sorted !)
        :param contacts_list:
        :param seq:
        :param out:
        :param prefix:
        :param nc:
        :param append:
        :param ref:
        :return:
        """
        mapy = []
223
        tp_count = 0
fabrice's avatar
fabrice committed
224
225
226
227
228
229
230
        filemode = 'a' if append else 'w'
        dist_desc = '\tTP/FP\tdist%s(ref)' % self.settings.setup.config[
            "groupby_method"] if \
            distmap is not None \
            else ''
        with open("%s/%s.contacts.txt" % (out, prefix), filemode) as outfile:
            if not append:
231
232
                outfile.write(
                    '''# resid1\tresid2\tres1\tres2%s\n''' % dist_desc)
fabrice's avatar
fabrice committed
233

234
235
236
            if hasattr(contacts_list, 'keys'):
                contacts = contacts_list.keys() if not nc else \
                    contacts_list.keys()[:nc]
fabrice's avatar
fabrice committed
237
238
239
240
241
242
243
                d_type = True
            else:
                # Check if contacts is 2-tuple
                if False in [len(item) == 2 for item in contacts_list]:
                    raise TypeError('Contact list must be 2-tuple !')
                contacts = contacts_list if not nc else contacts_list[:nc]
                d_type = False
244

245
            LOG.debug("Contact list %s", contacts)
fabrice's avatar
fabrice committed
246
247
248
249
250
251
252
253
254
255
            for contact in contacts:

                if d_type:
                    # If dictionary
                    resid1 = int(contacts_list[contact].setdefault('res1_nb'))
                    resid2 = int(contacts_list[contact].setdefault('res2_nb'))
                else:
                    resid1 = int(contact[0])
                    resid2 = int(contact[1])

256
                LOG.debug("Contact %s", str(contact))
fabrice's avatar
fabrice committed
257
258
259
260
261
262
263
264
265
266
                if distmap is not None:
                    dist = distmap.ix[(resid1, resid2)]
                else:
                    dist = {'dist': '', 'atoms': ''}

                if (resid1, resid2) not in mapy:
                    mapy.append((resid1, resid2))
                    if ref is not None:
                        if ref.ix[(resid1, resid2)]:
                            asses = 'TP'
267
                            tp_count += 1
fabrice's avatar
fabrice committed
268
269
270
271
272
273
274
275
276
277
278
279
280
281
                        else:
                            asses = 'FP'
                        outfile.write(
                            "%s\t%s\t%s\t%s\t%s\t%s\n" % (resid1 + 1,
                                                          resid2 + 1,
                                                          seq[resid1],
                                                          seq[resid2], asses,
                                                          dist))
                    else:
                        outfile.write("%s\t%s\t%s\t%s\n" % (resid1 + 1,
                                                            resid2 + 1,
                                                            seq[resid1],
                                                            seq[resid2]))

282
            ptp = (tp_count / float(len(contacts))) * 100.0 if ref is not None else \
fabrice's avatar
fabrice committed
283
284
285
286
                None
            outfile.write('''
# TP number : {tp} ({ptp:.2f} %)
# Number of contacts : {nc}
287
    '''.format(tp=tp_count, ptp=ptp, nc=len(contacts)))
fabrice's avatar
fabrice committed
288
289
290
291
292
293
294


if __name__ == "__main__":
    # Test AriaEcCommand object
    from .ecsettings import AriaEcSettings

    logging.basicConfig(level=logging.DEBUG)
295
    LOG = logging.getLogger("Setup")
296
    AriaEcSettings('setup').load_config('aria_ec.ini')