RapidPeptidesGenerator.py 16.5 KB
Newer Older
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
#!/usr/bin/env python3.6

########################################################################
# Rapid Peptide Generator (RPG) is a software dedicated to predict     #
# cleavage sites of proteases. User can create his own enzyme,         #
# following a simple grammar.                                          #
#                                                                      #
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Main file of RPG software, handle input/output and launch
necessary functions
"""

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
32
__version_info__ = ('1', '0', '5')
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
33
__version__ = '.'.join(__version_info__)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
34
__revision_date__ = "2018-07-13"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
35
36
37
38
39
40
41
__author__ = "Nicolas Maillet"

import argparse
import os
import sys
import uuid
from pathlib import Path
42
#from context import rpg
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
43
44
45
46
from rpg import core
from rpg import digest
from rpg import enzyme
from rpg.enzymes_definition import AVAILABLE_ENZYMES
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
47
48
49
50
51
52
53
sys.path.insert(0, str(Path.home())) # Home path
from rpg_user import AVAILABLE_ENZYMES_USER

ALL_ENZYMES = AVAILABLE_ENZYMES + AVAILABLE_ENZYMES_USER
"""All available enzymes in RPG."""

def restricted_float(mc_val):
54
    """Restricts input miscleavage value to a float between 0 and 100.
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
55
56
57
58
59
60
61
62
63
64
65
66
67

    :param mc_val: value to test
    :type mc_val: float

    :return: the inputed value if correct
    :rtype: float

    :raises custom ValueError: if value is not between 0 and 100
    :raises custom TypeError: if value is not a float
    """
    try:
        mc_val = float(mc_val)
        if mc_val < 0 or mc_val > 100:
68
            core.handle_errors("miscleavage value should be between 0 and "\
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
69
70
71
72
                               "100.", 0, "Value ")
        return mc_val
    except ValueError:
        # Throw an error
73
        core.handle_errors("miscleavage value should be a float between 0 "\
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
                           "and 100.", 0, "Type ")

def restricted_enzyme_id(enz_id):
    """Restrict input enzyme id to an int corresponding to an enzyme.

    :param mc_val: value to test
    :type mc_val: int

    :return: the inputed enzyme id
    :rtype: int

    :raises custom ValueError: if id does not correspond to any enzyme
    :raises custom TypeError: if value is not an int
    """
    try:
        enz_id = int(enz_id)
        ids_available = []
        for i in ALL_ENZYMES:
            ids_available.append(i.id_)
        if enz_id not in ids_available:
            core.handle_errors("id " + str(enz_id) + " does not correspond to"\
                               " any enzyme. Use -l to get enzyme ids.", 0,
                               "Input ")
        return enz_id
    except ValueError:
        # Throw an error
        core.handle_errors("Enzyme id should be an integer.", 0, "Type ")

def list_enzyme():
    """Print all available enzymes"""
    for enz in ALL_ENZYMES:
        print("%i: %s" % (enz.id_, enz.name))

107
def create_enzymes_to_use(enzymes, miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
108
    """Create the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
109
    Each enzyme can be associated to a miscleavage value.
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
110
111

    :param enzymes: enzymes ids chosen by user
112
    :param miscleavage: associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
113
    :type enzymes: list(int)
114
    :type miscleavage: list(float)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
115

116
    :return: list of enzyme's id with associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
117
118
119
120
121
122
    :rtype: list(int)
    """

    # Complete Enzymes to use (return)
    enzymes_to_use = []
    if enzymes:
123
        # Too much miscleavage values
124
        if len(miscleavage) > len(enzymes):
125
            core.handle_errors("Too much miscleavage values. Last values"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
126
127
                               " will be ignored.")
            # Get only the first ones
128
            miscleavage = miscleavage[:len(enzymes)]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
129
        cur_pos = -1
130
131
        # Get all enzymes with a given miscleavage
        for i, _ in enumerate(miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
132
133
134
135
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == enzymes[i]:
136
137
                    # Change miscleavage ratio
                    enz.ratio_miscleavage = miscleavage[i]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
138
139
140
                    # Add it
                    enzymes_to_use.append(enz)
            cur_pos = i
141
        # Get all enzymes without miscleavage value
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
142
143
144
145
146
147
148
149
150
151
        for i in enzymes[cur_pos + 1:]:
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == i:
                    # Add it
                    enzymes_to_use.append(enz)
    # Return enzymes to use
    return enzymes_to_use
# Not tested
152
def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
153
    """Get the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
154
    Each enzyme (and associated miscleavage value) are inputed by
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
155
156
    user. If there is a problem, user is interrogated again.

157
    :param mode: Digestion mode. If 'concurrent', no miscleavage values are used
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
158
    :param enzymes: enzyme's ids chosen by user
159
    :param miscleavage: associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
160
161
    :type mode: str
    :type enzymes: list(int)
162
    :type miscleavage: list(float)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
163

164
    :return: list of enzyme's id with associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
165
166
167
168
169
170
    :rtype: list(int)

    .. warning:: Not tested
    """

    # Get the correct Enzymes inputed
171
    enzymes_to_use = create_enzymes_to_use(id_enz_selected, miscleavage)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    # No good Enzymes inputed, let user choose
    if not enzymes_to_use:
        id_enz_inputed = []
        # Print all available enzymes
        list_enzyme()
        # Ask user to give correct enzymes ids
        while not enzymes_to_use:
            id_enz_inp = input("Choose which enzyme(s) to use, separated by"
                               " comma (example: 1,5,6). (q) to quit:\n")
            # Quit
            if "q" in id_enz_inp:
                sys.exit(0)
            # Get a list of ids
            for i in id_enz_inp.split(","):
                try:
                    # Convert it to int
                    i = int(i)
                    id_enz_inputed.append(i)
                # Not an int?
                except ValueError:
                    # Throw an error
                    core.handle_errors("'%s' should be an integer value. This"
                                       " values will be ignored." % i)
            mc_enz_inputed = []
            if mode == "sequential":
197
                mc_enz_inp = input("Percentage of miscleavage per inputed"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
                                   " enzyme (default 0), separated by comma"
                                   " (example: 1,5,6):\n")
                if mc_enz_inp:
                    # Get a list of int
                    for i in mc_enz_inp.split(","):
                        try:
                            # Convert it to int
                            i = float(i)
                            mc_enz_inputed.append(i)
                        # Not an int?
                        except ValueError:
                            # Throw an error
                            core.handle_errors("'%s' should be an floating"
                                               " value. This values will be"
                                               " ignored." % i)
            # Get the correct Enzyme if enzymes inputed
            enzymes_to_use = create_enzymes_to_use(id_enz_inputed,
                                                   mc_enz_inputed)
    # Return Enzymes to use
    return enzymes_to_use
# Not tested
def main():
    """Launcher of RapidPeptidesGenerator

    .. warning:: Not tested
    """
    parser = argparse.ArgumentParser(description="This software takes protein "
                                                 "sequences as input (-i optio"
                                                 "n). All sequences will be cl"
                                                 "eaved according to selected "
                                                 "enzymes (-e option) and give"
229
                                                 "n miscleavage percentage ("
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
230
231
232
                                                 "-m option). Cleaving can be "
                                                 "sequential or concurrent (-d"
                                                 " option). Resulting peptides"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
233
                                                 " are outputted in a file (-o"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
234
                                                 " option) in fasta, csv or ts"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
235
236
237
238
239
240
241
242
243
244
                                                 "v format (-f option). Classi"
                                                 "cal enzymes are included (-l"
                                                 " option to print available e"
                                                 "nzymes) but it is possible t"
                                                 "o define other enzymes (-a o"
                                                 "ption). See https://gitlab.p"
                                                 "asteur.fr/nmaillet/rpg/ and "
                                                 "https://rapid-peptide-genera"
                                                 "tor.readthedocs.io for more "
                                                 "informations.")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
245
246
    group_launch = parser.add_mutually_exclusive_group(required=True)
    group_launch.add_argument("-a", "--addenzyme", action="store_true",
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
247
248
249
                              help="Create a new enzyme. See https://rapid-pe"\
                              "ptide-generator.readthedocs.io for more inform"\
                              "ations")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
    parser.add_argument("-d", "--digest", metavar="",
                        choices=['s', 'sequential', 'c', 'concurrent'],
                        default="s", help="Digestion mode. Either 's', 'seque"
                        "ntial', 'c' or 'concurrent' (default: s)")
    parser.add_argument("-e", "--enzymes", metavar="", default=[],
                        action='append', type=restricted_enzyme_id,
                        help="Id of enzyme(s) to use (i.e. -e 0 -e 5 -e 10 to"
                        " use enzymes 0, 5 and 10). Use -l first to get "
                        "enzyme ids")
    parser.add_argument("-f", "--fmt", metavar="",
                        choices=['fasta', 'csv', 'tsv'], default="fasta",
                        help="Output file format. Either 'fasta', 'csv', or "
                        "'tsv' (default: fasta)")
    group_launch.add_argument("-i", "--inputdata", metavar="",
                              help="Input file, in fasta / fastq format or a "
                              "single protein sequence without commentary")
    group_launch.add_argument("-l", "--list", action="store_true",
                              help="Display the list of available enzymes")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
268
    parser.add_argument("-m", "--miscleavage", metavar="", default=[],
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
269
                        action='append', type=restricted_float,
270
                        help="Percentage of miscleavage, between 0 and 100,"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
                        " by enzyme(s). It should be in the same order than "
                        "-enzymes options (i.e. -m 15 -m 5 -m 10). Only for "
                        "sequential digestion (default: 0)")
    parser.add_argument("-n", "--noninteractive", action='store_true',
                        help="Non-interactive mode. No standard output, only "
                        "error(s) (--quiet enable, overwrite -v). If output "
                        "filename already exists, output file will be "
                        "overwritten.")
    group_output = parser.add_mutually_exclusive_group()
    group_output.add_argument("-o", "--outputfile", type=str, metavar="",
                              default="peptides", help="Result file to "
                              "output result peptides (default './peptides"
                              ".xxx' depending of --fmt)")
    group_output.add_argument("-r", "--randomname", action="store_true",
                              help="Random (not used) output name file")
    group_verbose = parser.add_mutually_exclusive_group()
    group_verbose.add_argument("-q", "--quiet", action="store_true",
                               help="No standard output, only error(s)")
    group_verbose.add_argument("-v", "--verbose", action="count", default=0,
290
291
                               help="Increase output verbosity. -vv will "
                               "increase more than -v")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
292
293
294
295
296
297
298
    parser.add_argument("--version", action="version",
                        version="%(prog)s " + __version__ + " from " +
                        __revision_date__)
    args = parser.parse_args()

    # --addenzyme option
    if args.addenzyme:
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
299
        enzyme.user_creation_enzyme(ALL_ENZYMES)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
300
301
302
303
304
305
        sys.exit(0)

    # --digest option
    mode = "sequential"
    if args.digest == "c" or args.digest == "concurrent":
        mode = "concurrent"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
306
        args.miscleavage = []  # No miscleavage on concurrent, infinite time
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353

    # --list option
    if args.list:
        list_enzyme()
        sys.exit(0)

    # --nointeractive option
    if args.noninteractive:
        args.quiet = 1
        args.verbose = 0

    # --outputfile / --randomname options
    if args.randomname:
        # Generate a random file name
        output_file = str(uuid.uuid4().hex) + "." + args.fmt
        # Ensure the name is unique
        while os.path.isfile(output_file):
            # Generate a random file name
            output_file = str(uuid.uuid4().hex) + "." + args.fmt
    # Chosen file name
    else:
        # Given name
        tmpname = str(args.outputfile)
        # No extension?
        if "." not in tmpname:
            # Add default extension for this file format
            output_file = tmpname + "." + args.fmt
        else:
            output_file = tmpname
        # If interactive mode
        if not args.noninteractive:
            # This file already exist?
            while os.path.isfile(output_file):
                core.handle_errors("File '%s' already exit!" % output_file)
                # Don't overwrite it
                if input("Overwrite it? (y/n)\n") != "y":
                    tmpname = input("Output filename?\n")
                    # No extension?
                    if "." not in tmpname:
                        # Add default extension for this file format
                        output_file = tmpname + "." + args.fmt
                    else:
                        output_file = tmpname
                # Overwrite it
                else:
                    break

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
354
355
    # More mis cleavage than enzyme
    if len(args.miscleavage) > len(args.enzymes):
356
        core.handle_errors("Too much miscleavage values. Last values will "
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
357
                           "be ignored.")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
358
        args.miscleavage = args.miscleavage[:len(args.enzymes)]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
359
360

    # Get all enzymes inputed
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
361
    enzymes_to_use = get_enzymes_to_use(mode, args.enzymes, args.miscleavage)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
362
363
364
365
366
367

    # Output options
    if args.verbose:
        print("Input: " + args.inputdata)
        print("Enzyme(s) used: " + str([enz.name for enz in enzymes_to_use]))
        print("Mode: " + mode)
368
        print("miscleavage ratio: " +
369
              str([enz.ratio_miscleavage for enz in enzymes_to_use]))
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
        print("Output file: " + os.path.abspath(output_file))

    # Make the actual digestion of input data
    results_digestion = digest.digest_from_input(args.inputdata,
                                                 enzymes_to_use, mode)

    # Output results
    core.output_results(output_file, results_digestion, args.fmt, args.quiet,
                        args.verbose)


### Let'z go ###
if __name__ == '__main__':
    main()
    # The end
    sys.exit(0)