enzyme.py 12 KB
Newer Older
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-

########################################################################
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# This file is part of Rapid Peptide Generator (RPG) software.         #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Contains class and functions related to enzymes definition and use"""
import os
import re
from pathlib import Path
29
30
from rpg import core
from rpg import rule
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
31
32
33
34
35
36

DEFUSERENZFILE = str(Path.home()) + "/rpg_user.py"

# Create the enzymes_user file if it does not exist
if not os.path.isfile(DEFUSERENZFILE):
    with open(DEFUSERENZFILE, "w") as out_file:
37
38
        out_file.write("from rpg import enzyme\nfrom rpg import rule\n"\
                       "from rpg import enzymes_definition"\
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
39
40
41
42
43
44
45
46
47
                       "\n\nAVAILABLE_ENZYMES_USER = []\nCPT_ENZ = enzymes_de"\
                       "finition.CPT_ENZ\n\n### ENZYMES DECLARATION ###\n")

class Enzyme:
    """Definition of an cleaving enzyme containing specific rules.

    :param id_: id of the enzyme
    :param name: name of the enzyme
    :param rules: cleaving rules
48
    :param ratio_miscleavage: miscleavage ratio
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
49
50
51
    :type id_: int
    :type name: str
    :type rules: list(:py:class:`~rpg.rule.Rule`)
52
    :type ratio_miscleavage: float
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
53
    """
54
    def __init__(self, id_, name, rules, ratio_miscleavage=0):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
55
56
        self.id_ = id_
        self.name = name
57
        self.ratio_miscleavage = ratio_miscleavage
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
58
59
60
61
        self.rules = rules

    # self representation for print
    def __repr__(self):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
62
        return "Id: %s\nName: %s\nRatio Miscleavage: %.2f%%\nRules: %s\n" %\
63
            (self.id_, self.name, self.ratio_miscleavage, self.rules)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

    # Equality between two Enzymes
    def __eq__(self, other):
        if isinstance(self, other.__class__):
            return self.__dict__ == other.__dict__
        return False

    # Needed with __eq__ to make it hashable
    def __hash__(self):
        return hash(self.__dict__.values())

    def write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE):
        """Write enzyme to user's enzyme file as a Python function.

        :param enz_file: location of user file (default: ~/rpg_user.py)
        :type enz_file: str
        """
        if self.rules != []:
            # Comment and first line of the Enzyme
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
83
            ret = "\n\n\n# User-defined enzyme " + self.name + "\nENZ = []\n\n"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
            # Write all the main rules and their su-rules
            for i in self.rules:
                ret += i.format_rule()
            # Write the end of the Enzyme
            ret += "ENZYME = enzyme.Enzyme(CPT_ENZ, \"" + self.name + "\", "\
                   "ENZ, 0)\n# Add it to available enzymes\nAVAILABLE_ENZYMES"\
                   "_USER.append(ENZYME)\nCPT_ENZ += 1\n"
            # Write all in the user file
            try:
                with open(enz_file, "a") as output_file:
                    output_file.write(ret)
            except IOError:
                core.handle_errors("'%s' can't be open in '%s' mode" %
                                   (enz_file, "a"), 0, "File ")

def check_enzyme_name(name_new_enz, all_name_enz):
    """Validate the name of a new enzyme.

    :param name_new_enz: name of the new enzyme
    :param all_name_enz: names of already created enzymes
    :type name_new_enz: str
    :type all_name_enz: list(str)

    :return: True if name is correct
    :rtype: bool

    Enzyme name should not contains whitespace character (' ', \\\\t,
    \\\\n, \\\\r, \\\\f, \\\\v), be empty or be already used.
    """

    ret = True
    # If the enzyme name is already taken
    if name_new_enz in all_name_enz:
        core.handle_errors("This name exist, please choose another name.", 2)
        ret = False

    # Does it contain ' ' character?
    res = re.search(" ", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Space character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \t character?
    res = re.search("\t", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Tab character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\t")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Tab character found at position " +
                           str(res + 1) + ", please choose another name.", 2)
        ret = False

    # Does it contain \n character?
    res = re.search("\n", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Newline character found at position " +
                           str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\n")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Newline character found at position " +
                           str(res + 1) + ", please choose another name.", 2)
        ret = False

    # Does it contain \r character?
    res = re.search("\r", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Carriage return (\\r) character found "
                           "at position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\r")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Carriage return (\\r) character found "
                           "at position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \f character?
    res = re.search("\f", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Form feed (\\f) character found at "
                           "position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\f")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Form feed (\\f) character found at "
                           "position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Does it contain \v character?
    res = re.search("\v", name_new_enz)
    if res:
        to_print = ""
        for _ in range(res.start()):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
                           "position " + str(res.start() + 1) +
                           ", please choose another name.", 2)
        ret = False
    res = name_new_enz.find("\\v")
    if res != -1:
        to_print = ""
        for _ in range(res):
            to_print += " "
        to_print += "^\n"
        core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
                           "position " + str(res + 1) +
                           ", please choose another name.", 2)
        ret = False

    # Not empty
    if name_new_enz == "":
        core.handle_errors("Please choose a not empty name.", 2)
        ret = False

    return ret

# Not tested
248
def user_creation_enzyme(all_enzymes):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
249
250
251
252
253
254
    """Text-mod form to input a new enzyme.

    .. warning:: Not tested
    .. warning:: It could be a problem to immediately use the new enzyme (see in-code warning)
    """
    add_enzyme = "y"
255
256
257
258
259
260
261
262

    # All enzymes name
    all_name_enz = set()

    # Get all used names
    for enz in all_enzymes:
        all_name_enz.add(enz.name)

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
    # Adding enzyme
    while add_enzyme == "y":

        # Name of the enzyme
        name_new_enz = input("Name of the new enzyme?\n")
        while not check_enzyme_name(name_new_enz, all_name_enz):
            # Name of the enzyme
            name_new_enz = input("Name of the new enzyme?\n")

        # All the rules entered by user
        all_rules = {}
        # Input of user for creating rules
        def_rule = "_"
        while def_rule != "":
            # Type of rule?
            cutmp = ""
            # Ensure we got a correct value i.e. c, e or q
            while (cutmp != "c") and (cutmp != "e") and (cutmp != "q"):
                cutmp = input("Create a cleaving rule (c) or an exception (e)?"
                              " (q) to quit:\n")
            # Exit
            if cutmp == "q":
                break
            # Set the cut to what the user ask: e = False
            cut = False
            # c = True
            if cutmp == "c":
                cut = True
            # The rule is valid?
            validate_rule = ""
            # Until the rules is not properly defined:
            while validate_rule == "":
                # Cleaving rule
                if cut:
                    def_rule = input("Write your cleaving rule,"
                                     " (q) to quit:\n")
                # Exception rule
                else:
                    def_rule = input("Write your exception rule,"
                                     " (q) to quit:\n")
                # Quit?
                if def_rule == "q":
                    break
                # Check if input is coherent
                validate_rule = rule.check_rule(def_rule)
            # Add this rule
            if validate_rule != "":
                all_rules[validate_rule] = cut

        # Get all the rules in correct format
        correct_rules = rule.create_rules(all_rules)

        # Create the enzyme with fake id (auto-inc)
        # .. warning:: It could be a problem to immediately use the new enzyme
        new_enz = Enzyme(-1, name_new_enz, correct_rules)

        # Write in the user-defined enzymes file
        new_enz.write_enzyme_in_user_file()

322
323
324
        # Add it to known names
        all_name_enz.add(new_enz.name)

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
325
        # End of this new enzyme
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
326
        add_enzyme = input("Add another enzyme? (y/n)\n")