RapidPeptidesGenerator.py 17.3 KB
Newer Older
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# -*- coding: utf-8 -*-
#!/usr/bin/env python3.6

########################################################################
# Rapid Peptide Generator (RPG) is a software dedicated to predict     #
# cleavage sites of proteases. User can create his own enzyme,         #
# following a simple grammar.                                          #
#                                                                      #
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Main file of RPG software, handle input/output and launch
necessary functions
"""

32
__version_info__ = ('1', '1', '0')
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
33
__version__ = '.'.join(__version_info__)
34
__revision_date__ = "2019-04-03"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
35
36
37
38
39
40
41
__author__ = "Nicolas Maillet"

import argparse
import os
import sys
import uuid
from pathlib import Path
42
#from context import rpg
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
43
44
45
46
from rpg import core
from rpg import digest
from rpg import enzyme
from rpg.enzymes_definition import AVAILABLE_ENZYMES
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
47
48
49
50
51
52
53
sys.path.insert(0, str(Path.home())) # Home path
from rpg_user import AVAILABLE_ENZYMES_USER

ALL_ENZYMES = AVAILABLE_ENZYMES + AVAILABLE_ENZYMES_USER
"""All available enzymes in RPG."""

def restricted_float(mc_val):
54
    """Restricts input miscleavage value to a float between 0 and 100.
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
55
56
57
58
59
60
61
62
63
64
65
66
67

    :param mc_val: value to test
    :type mc_val: float

    :return: the inputed value if correct
    :rtype: float

    :raises custom ValueError: if value is not between 0 and 100
    :raises custom TypeError: if value is not a float
    """
    try:
        mc_val = float(mc_val)
        if mc_val < 0 or mc_val > 100:
68
            core.handle_errors("miscleavage value should be between 0 and "\
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
69
70
71
72
                               "100.", 0, "Value ")
        return mc_val
    except ValueError:
        # Throw an error
73
        core.handle_errors("miscleavage value should be a float between 0 "\
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
                           "and 100.", 0, "Type ")

def restricted_enzyme_id(enz_id):
    """Restrict input enzyme id to an int corresponding to an enzyme.

    :param mc_val: value to test
    :type mc_val: int

    :return: the inputed enzyme id
    :rtype: int

    :raises custom ValueError: if id does not correspond to any enzyme
    :raises custom TypeError: if value is not an int
    """
    try:
        enz_id = int(enz_id)
        ids_available = []
        for i in ALL_ENZYMES:
            ids_available.append(i.id_)
        if enz_id not in ids_available:
            core.handle_errors("id " + str(enz_id) + " does not correspond to"\
                               " any enzyme. Use -l to get enzyme ids.", 0,
                               "Input ")
        return enz_id
    except ValueError:
        # Throw an error
        core.handle_errors("Enzyme id should be an integer.", 0, "Type ")

def list_enzyme():
    """Print all available enzymes"""
    for enz in ALL_ENZYMES:
        print("%i: %s" % (enz.id_, enz.name))

107
def create_enzymes_to_use(enzymes, miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
108
    """Create the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
109
    Each enzyme can be associated to a miscleavage value.
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
110
111

    :param enzymes: enzymes ids chosen by user
112
    :param miscleavage: associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
113
    :type enzymes: list(int)
114
    :type miscleavage: list(float)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
115

116
    :return: list of enzyme's id with associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
117
118
119
120
121
122
    :rtype: list(int)
    """

    # Complete Enzymes to use (return)
    enzymes_to_use = []
    if enzymes:
123
        # Too much miscleavage values
124
        if len(miscleavage) > len(enzymes):
125
            core.handle_errors("Too much miscleavage values. Last values"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
126
127
                               " will be ignored.")
            # Get only the first ones
128
            miscleavage = miscleavage[:len(enzymes)]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
129
        cur_pos = -1
130
131
        # Get all enzymes with a given miscleavage
        for i, _ in enumerate(miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
132
133
134
135
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == enzymes[i]:
136
137
                    # Change miscleavage ratio
                    enz.ratio_miscleavage = miscleavage[i]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
138
139
140
                    # Add it
                    enzymes_to_use.append(enz)
            cur_pos = i
141
        # Get all enzymes without miscleavage value
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
142
143
144
145
146
147
148
149
150
151
        for i in enzymes[cur_pos + 1:]:
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == i:
                    # Add it
                    enzymes_to_use.append(enz)
    # Return enzymes to use
    return enzymes_to_use
# Not tested
152
def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
153
    """Get the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
154
    Each enzyme (and associated miscleavage value) are inputed by
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
155
156
    user. If there is a problem, user is interrogated again.

157
    :param mode: Digestion mode. If 'concurrent', no miscleavage values are used
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
158
    :param enzymes: enzyme's ids chosen by user
159
    :param miscleavage: associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
160
161
    :type mode: str
    :type enzymes: list(int)
162
    :type miscleavage: list(float)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
163

164
    :return: list of enzyme's id with associated miscleavage values
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
165
166
167
168
169
170
    :rtype: list(int)

    .. warning:: Not tested
    """

    # Get the correct Enzymes inputed
171
    enzymes_to_use = create_enzymes_to_use(id_enz_selected, miscleavage)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
172
173
174
175
176
177
178
179
    # No good Enzymes inputed, let user choose
    if not enzymes_to_use:
        id_enz_inputed = []
        # Print all available enzymes
        list_enzyme()
        # Ask user to give correct enzymes ids
        while not enzymes_to_use:
            id_enz_inp = input("Choose which enzyme(s) to use, separated by"
180
                               " space (example: 1 5 6). (q) to quit:\n")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
181
182
183
184
            # Quit
            if "q" in id_enz_inp:
                sys.exit(0)
            # Get a list of ids
185
            for i in id_enz_inp.split(" "):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
186
187
188
189
190
191
192
193
194
195
196
                try:
                    # Convert it to int
                    i = int(i)
                    id_enz_inputed.append(i)
                # Not an int?
                except ValueError:
                    # Throw an error
                    core.handle_errors("'%s' should be an integer value. This"
                                       " values will be ignored." % i)
            mc_enz_inputed = []
            if mode == "sequential":
197
                mc_enz_inp = input("Percentage of miscleavage per inputed"
198
199
                                   " enzyme (default 0), separated by sapce"
                                   " (example: 1.2 5 6):\n")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
200
201
                if mc_enz_inp:
                    # Get a list of int
202
                    for i in mc_enz_inp.split(" "):
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
                        try:
                            # Convert it to int
                            i = float(i)
                            mc_enz_inputed.append(i)
                        # Not an int?
                        except ValueError:
                            # Throw an error
                            core.handle_errors("'%s' should be an floating"
                                               " value. This values will be"
                                               " ignored." % i)
            # Get the correct Enzyme if enzymes inputed
            enzymes_to_use = create_enzymes_to_use(id_enz_inputed,
                                                   mc_enz_inputed)
    # Return Enzymes to use
    return enzymes_to_use
# Not tested
def main():
    """Launcher of RapidPeptidesGenerator

    .. warning:: Not tested
    """
    parser = argparse.ArgumentParser(description="This software takes protein "
                                                 "sequences as input (-i optio"
                                                 "n). All sequences will be cl"
                                                 "eaved according to selected "
                                                 "enzymes (-e option) and give"
229
                                                 "n miscleavage percentage ("
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
230
231
232
                                                 "-m option). Cleaving can be "
                                                 "sequential or concurrent (-d"
                                                 " option). Resulting peptides"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
233
                                                 " are outputted in a file (-o"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
234
                                                 " option) in fasta, csv or ts"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
235
236
237
238
239
240
241
242
243
244
                                                 "v format (-f option). Classi"
                                                 "cal enzymes are included (-l"
                                                 " option to print available e"
                                                 "nzymes) but it is possible t"
                                                 "o define other enzymes (-a o"
                                                 "ption). See https://gitlab.p"
                                                 "asteur.fr/nmaillet/rpg/ and "
                                                 "https://rapid-peptide-genera"
                                                 "tor.readthedocs.io for more "
                                                 "informations.")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
245
246
    group_launch = parser.add_mutually_exclusive_group(required=True)
    group_launch.add_argument("-a", "--addenzyme", action="store_true",
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
247
248
249
                              help="Create a new enzyme. See https://rapid-pe"\
                              "ptide-generator.readthedocs.io for more inform"\
                              "ations")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
250
251
252
253
254
    parser.add_argument("-d", "--digest", metavar="",
                        choices=['s', 'sequential', 'c', 'concurrent'],
                        default="s", help="Digestion mode. Either 's', 'seque"
                        "ntial', 'c' or 'concurrent' (default: s)")
    parser.add_argument("-e", "--enzymes", metavar="", default=[],
255
256
257
                        nargs='+', type=restricted_enzyme_id,
                        help="Id of enzyme(s) to use (i.e. -e 0 5 10 to use "
                        "enzymes 0, 5 and 10). Use -l first to get enzyme ids")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
258
259
260
261
262
    parser.add_argument("-f", "--fmt", metavar="",
                        choices=['fasta', 'csv', 'tsv'], default="fasta",
                        help="Output file format. Either 'fasta', 'csv', or "
                        "'tsv' (default: fasta)")
    group_launch.add_argument("-i", "--inputdata", metavar="",
263
                              help="Input file, in fasta / fastq format")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
264
265
    group_launch.add_argument("-l", "--list", action="store_true",
                              help="Display the list of available enzymes")
266
267
    group_launch.add_argument("-s", "--sequence", metavar="",
                              help="Input a single protein sequence without commentary")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
268
    parser.add_argument("-m", "--miscleavage", metavar="", default=[],
269
                        nargs='+', type=restricted_float,
270
                        help="Percentage of miscleavage, between 0 and 100,"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
271
                        " by enzyme(s). It should be in the same order than "
272
273
                        "-enzymes options (i.e. -m 15 5 10). Only for sequenti"
                        "al digestion (default: 0)")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
274
275
276
277
278
    parser.add_argument("-n", "--noninteractive", action='store_true',
                        help="Non-interactive mode. No standard output, only "
                        "error(s) (--quiet enable, overwrite -v). If output "
                        "filename already exists, output file will be "
                        "overwritten.")
279
280
    parser.add_argument("-p", "--pka", metavar="", choices=['ipc', 'stryer'],
                        default="ipc", help="Define pKa values. Either 'ipc' "
281
                        "or 'stryer' (default: ipc)")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
282
283
    group_output = parser.add_mutually_exclusive_group()
    group_output.add_argument("-o", "--outputfile", type=str, metavar="",
284
285
                              default="", help="Optional result file "
                              "to output result peptides.")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
286
    group_output.add_argument("-r", "--randomname", action="store_true",
287
                              help="Random (not used) output file name")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
288
289
290
291
    group_verbose = parser.add_mutually_exclusive_group()
    group_verbose.add_argument("-q", "--quiet", action="store_true",
                               help="No standard output, only error(s)")
    group_verbose.add_argument("-v", "--verbose", action="count", default=0,
292
293
                               help="Increase output verbosity. -vv will "
                               "increase more than -v")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
294
295
296
297
298
299
300
    parser.add_argument("--version", action="version",
                        version="%(prog)s " + __version__ + " from " +
                        __revision_date__)
    args = parser.parse_args()

    # --addenzyme option
    if args.addenzyme:
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
301
        enzyme.user_creation_enzyme(ALL_ENZYMES)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
302
303
304
305
306
307
        sys.exit(0)

    # --digest option
    mode = "sequential"
    if args.digest == "c" or args.digest == "concurrent":
        mode = "concurrent"
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
308
        args.miscleavage = []  # No miscleavage on concurrent, infinite time
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
309

310
311
312
313
314
    # --pka option
    aa_pka = core.AA_PKA_IPC
    if args.pka == "stryer":
        aa_pka = core.AA_PKA_S

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
315
316
317
318
319
320
321
322
323
324
    # --list option
    if args.list:
        list_enzyme()
        sys.exit(0)

    # --nointeractive option
    if args.noninteractive:
        args.quiet = 1
        args.verbose = 0

325
326
327
328
329
330
331
332
333
334
335
336
337
338
    # input data
    input_data = None
    input_type = None
    if args.inputdata:
        if os.path.isfile(args.inputdata):
            input_data = args.inputdata
            input_type = "file"
        else:
            core.handle_errors("file not found (%s)." % args.inputdata, 0, "I"\
                               "nput ")
    elif args.sequence:
        input_data = args.sequence
        input_type = "sequence"

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
339
    # --outputfile / --randomname options
340
    output_file = "" # No output file (default)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
341
342
343
344
345
346
347
    if args.randomname:
        # Generate a random file name
        output_file = str(uuid.uuid4().hex) + "." + args.fmt
        # Ensure the name is unique
        while os.path.isfile(output_file):
            # Generate a random file name
            output_file = str(uuid.uuid4().hex) + "." + args.fmt
348
349
    # Chosen file name if exist
    elif args.outputfile:
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
        # Given name
        tmpname = str(args.outputfile)
        # No extension?
        if "." not in tmpname:
            # Add default extension for this file format
            output_file = tmpname + "." + args.fmt
        else:
            output_file = tmpname
        # If interactive mode
        if not args.noninteractive:
            # This file already exist?
            while os.path.isfile(output_file):
                core.handle_errors("File '%s' already exit!" % output_file)
                # Don't overwrite it
                if input("Overwrite it? (y/n)\n") != "y":
                    tmpname = input("Output filename?\n")
                    # No extension?
                    if "." not in tmpname:
                        # Add default extension for this file format
                        output_file = tmpname + "." + args.fmt
                    else:
                        output_file = tmpname
                # Overwrite it
                else:
                    break

Nicolas  MAILLET's avatar
Nicolas MAILLET committed
376
377
    # More mis cleavage than enzyme
    if len(args.miscleavage) > len(args.enzymes):
378
        core.handle_errors("Too much miscleavage values. Last values will "
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
379
                           "be ignored.")
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
380
        args.miscleavage = args.miscleavage[:len(args.enzymes)]
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
381
382

    # Get all enzymes inputed
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
383
    enzymes_to_use = get_enzymes_to_use(mode, args.enzymes, args.miscleavage)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
384
385
386

    # Output options
    if args.verbose:
387
        print("Input: " + input_data)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
388
389
        print("Enzyme(s) used: " + str([enz.name for enz in enzymes_to_use]))
        print("Mode: " + mode)
390
        print("miscleavage ratio: " +
391
              str([enz.ratio_miscleavage for enz in enzymes_to_use]))
392
393
        if output_file:
            print("Output file: " + os.path.abspath(output_file))
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
394
395

    # Make the actual digestion of input data
396
    results_digestion = digest.digest_from_input(input_data, input_type,
397
                                                 enzymes_to_use, mode, aa_pka)
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
398
399
400
401
402
403
404
405
406
407
408

    # Output results
    core.output_results(output_file, results_digestion, args.fmt, args.quiet,
                        args.verbose)


### Let'z go ###
if __name__ == '__main__':
    main()
    # The end
    sys.exit(0)