enzyme.py 12.5 KB
Newer Older
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
# -*- coding: utf-8 -*-

########################################################################
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# This file is part of Rapid Peptide Generator (RPG) software.         #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Contains class and functions related to enzymes definition and use"""
import os
import re
from pathlib import Path
import core
import rule

DEFENZFILE = "enzymes_definition.py"
DEFUSERENZFILE = str(Path.home()) + "/rpg_user.py"

# Create the enzymes_user file if it does not exist
if not os.path.isfile(DEFUSERENZFILE):
    with open(DEFUSERENZFILE, "w") as out_file:
        out_file.write("import enzyme\nimport rule\nimport enzymes_definition"\
                       "\n\nAVAILABLE_ENZYMES_USER = []\nCPT_ENZ = enzymes_de"\
                       "finition.CPT_ENZ\n\n### ENZYMES DECLARATION ###\n")

class Enzyme:
    """Definition of an cleaving enzyme containing specific rules.

    :param id_: id of the enzyme
    :param name: name of the enzyme
    :param rules: cleaving rules
    :param ratio_miss_cleavage: miss-cleavage ratio
    :type id_: int
    :type name: str
    :type rules: list(:py:class:`~rpg.rule.Rule`)
    :type ratio_miss_cleavage: float
    """
    def __init__(self, id_, name, rules, ratio_miss_cleavage=0):
        self.id_ = id_
        self.name = name
        self.ratio_miss_cleavage = ratio_miss_cleavage
        self.rules = rules

    # self representation for print
    def __repr__(self):
        return "Id: %s\nName: %s\nRatio Miss Cleaveage: %.2f%%\nRules: %s\n" %\
            (self.id_, self.name, self.ratio_miss_cleavage, self.rules)

    # Equality between two Enzymes
    def __eq__(self, other):
        if isinstance(self, other.__class__):
            return self.__dict__ == other.__dict__
        return False

    # Needed with __eq__ to make it hashable
    def __hash__(self):
        return hash(self.__dict__.values())

    def write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE):
        """Write enzyme to user's enzyme file as a Python function.

        :param enz_file: location of user file (default: ~/rpg_user.py)
        :type enz_file: str
        """
        #print(self)
        if self.rules != []:
            # Comment and first line of the Enzyme
            ret = "\n\n\n# User defined enzyme " + self.name + "\nENZ = []\n\n"
            # Write all the main rules and their su-rules
            for i in self.rules:
                ret += i.format_rule()
            # Write the end of the Enzyme
            ret += "ENZYME = enzyme.Enzyme(CPT_ENZ, \"" + self.name + "\", "\
                   "ENZ, 0)\n# Add it to available enzymes\nAVAILABLE_ENZYMES"\
                   "_USER.append(ENZYME)\nCPT_ENZ += 1\n"
            # Write all in the user file
            try:
                with open(enz_file, "a") as output_file:
                    output_file.write(ret)
            except IOError:
                core.handle_errors("'%s' can't be open in '%s' mode" %
                                   (enz_file, "a"), 0, "File ")

def check_enzyme_name(name_new_enz, all_name_enz):
    """Validate the name of a new enzyme.

    :param name_new_enz: name of the new enzyme
    :param all_name_enz: names of already created enzymes
    :type name_new_enz: str
    :type all_name_enz: list(str)

    :return: True if name is correct
    :rtype: bool

    Enzyme name should not contains whitespace character (' ', \\\\t,
    \\\\n, \\\\r, \\\\f, \\\\v), be empty or be already used.
    """

    ret = True
    # If the enzyme name is already taken
    if name_new_enz in all_name_enz:
        core.handle_errors("This name exist, please choose another name.", 2)
        ret = False

    # Does it contain ' ' character?
    res = re.search(" ", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Space character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \t character?
    res = re.search("\t", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Tab character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\t")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Tab character found at position " +
                           str(res + 1) + ", please choose another name.", 2)
        ret = False

    # Does it contain \n character?
    res = re.search("\n", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Newline character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\n")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Newline character found at position " +
                           str(res + 1) + ", please choose another name.", 2)
        ret = False

    # Does it contain \r character?
    res = re.search("\r", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Carriage return (\\r) character found "
                           "at position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\r")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Carriage return (\\r) character found "
                           "at position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \f character?
    res = re.search("\f", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Form feed (\\f) character found at "
                           "position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\f")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Form feed (\\f) character found at "
                           "position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \v character?
    res = re.search("\v", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
                           "position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\v")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
                           "position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Not empty
    if name_new_enz == "":
        core.handle_errors("Please choose a not empty name.", 2)
        ret = False

    return ret

# Not tested
def user_creation_enzyme():
    """Text-mod form to input a new enzyme.

    .. warning:: Not tested
    .. warning:: It could be a problem to immediately use the new enzyme (see in-code warning)
    """
    add_enzyme = "y"
    # Adding enzyme
    while add_enzyme == "y":
        # All enzymes name
        all_name_enz = set()

        # Get all used names
        # Done here to prevent multiple time opening those files
        try:
            with open(DEFENZFILE) as enz_file:
                for line in enz_file:
                    if line[0:29] == "ENZYME = enzyme.Enzyme(CPT_ENZ,":
                        all_name_enz.add(line.split("\"")[1])
        except IOError:
            core.handle_errors("fail to open " + DEFENZFILE, 0, "File ")
        try:
            with open(DEFUSERENZFILE) as user_enz_file:
                for line in user_enz_file:
                    if line[0:29] == "ENZYME = enzyme.Enzyme(CPT_ENZ,":
                        all_name_enz.add(line.split("\"")[1])
        except IOError:
            core.handle_errors("fail to open " + DEFUSERENZFILE, 0, "File ")

        # Name of the enzyme
        name_new_enz = input("Name of the new enzyme?\n")
        while not check_enzyme_name(name_new_enz, all_name_enz):
            # Name of the enzyme
            name_new_enz = input("Name of the new enzyme?\n")

        # All the rules entered by user
        all_rules = {}
        # Input of user for creating rules
        def_rule = "_"
        while def_rule != "":
            # Type of rule?
            cutmp = ""
            # Ensure we got a correct value i.e. c, e or q
            while (cutmp != "c") and (cutmp != "e") and (cutmp != "q"):
                cutmp = input("Create a cleaving rule (c) or an exception (e)?"
                              " (q) to quit:\n")
            # Exit
            if cutmp == "q":
                break
            # Set the cut to what the user ask: e = False
            cut = False
            # c = True
            if cutmp == "c":
                cut = True
            # The rule is valid?
            validate_rule = ""
            # Until the rules is not properly defined:
            while validate_rule == "":
                # Cleaving rule
                if cut:
                    def_rule = input("Write your cleaving rule,"
                                     " (q) to quit:\n")
                # Exception rule
                else:
                    def_rule = input("Write your exception rule,"
                                     " (q) to quit:\n")
                # Quit?
                if def_rule == "q":
                    break
                # Check if input is coherent
                validate_rule = rule.check_rule(def_rule)
            # Add this rule
            if validate_rule != "":
                all_rules[validate_rule] = cut

        # Get all the rules in correct format
        correct_rules = rule.create_rules(all_rules)

        # Create the enzyme with fake id (auto-inc)
        # .. warning:: It could be a problem to immediately use the new enzyme
        new_enz = Enzyme(-1, name_new_enz, correct_rules)

        # Write in the user-defined enzymes file
        new_enz.write_enzyme_in_user_file()

        # End of this new enzyme
        add_enzyme = input("Add an other enzyme? (y/n)\n")