RapidPeptidesGenerator.py 16.6 KB
Newer Older
Nicolas  MAILLET's avatar
Nicolas MAILLET committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
# -*- coding: utf-8 -*-
#!/usr/bin/env python3.6

########################################################################
# Rapid Peptide Generator (RPG) is a software dedicated to predict     #
# cleavage sites of proteases. User can create his own enzyme,         #
# following a simple grammar.                                          #
#                                                                      #
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Main file of RPG software, handle input/output and launch
necessary functions
"""

#DOC : last peptide, cut at pos -1 because no cut
# Ajouter les enzymes de peptidecutter
# Faire de la docstring des enzymes pour le manuel -> ne pas oublier que l'exception (R,)(D)(D)
# si (R,)(D) n'y est pas ca sera (R,)(D) à TRUE ! Ca dans Factor Xa
# add with same name !!
# Test_enzyme ligne 50, bug sur le tmpfile

#Started on 22 avr. 2016
#Digest sequences given restriction enzymes
#Input format : a fasta/fastq file or a sequence
#Output format : a fasta/csv/tsv file containing the digested sequences
#@author: Nicolas Maillet
#@version: 0.0.9α
#@note: Pwet

__version_info__ = ('0', '0', '8')
__version__ = '.'.join(__version_info__)
__revision_date__ = "2018-02-28"
__author__ = "Nicolas Maillet"

import argparse
import os
import sys
import uuid
from pathlib import Path
import core
import digest
import enzyme
from enzymes_definition import AVAILABLE_ENZYMES
sys.path.insert(0, str(Path.home())) # Home path
from rpg_user import AVAILABLE_ENZYMES_USER

ALL_ENZYMES = AVAILABLE_ENZYMES + AVAILABLE_ENZYMES_USER
"""All available enzymes in RPG."""

def restricted_float(mc_val):
    """Restricts input miss-cleavage value to a float between 0 and 100.

    :param mc_val: value to test
    :type mc_val: float

    :return: the inputed value if correct
    :rtype: float

    :raises custom ValueError: if value is not between 0 and 100
    :raises custom TypeError: if value is not a float
    """
    try:
        mc_val = float(mc_val)
        if mc_val < 0 or mc_val > 100:
            core.handle_errors("miss-cleavage value should be between 0 and "\
                               "100.", 0, "Value ")
        return mc_val
    except ValueError:
        # Throw an error
        core.handle_errors("miss-cleavage value should be a float between 0 "\
                           "and 100.", 0, "Type ")

def restricted_enzyme_id(enz_id):
    """Restrict input enzyme id to an int corresponding to an enzyme.

    :param mc_val: value to test
    :type mc_val: int

    :return: the inputed enzyme id
    :rtype: int

    :raises custom ValueError: if id does not correspond to any enzyme
    :raises custom TypeError: if value is not an int
    """
    try:
        enz_id = int(enz_id)
        ids_available = []
        for i in ALL_ENZYMES:
            ids_available.append(i.id_)
        if enz_id not in ids_available:
            core.handle_errors("id " + str(enz_id) + " does not correspond to"\
                               " any enzyme. Use -l to get enzyme ids.", 0,
                               "Input ")
        return enz_id
    except ValueError:
        # Throw an error
        core.handle_errors("Enzyme id should be an integer.", 0, "Type ")

def list_enzyme():
    """Print all available enzymes"""
    for enz in ALL_ENZYMES:
        print("%i: %s" % (enz.id_, enz.name))

def create_enzymes_to_use(enzymes, miss_cleavage):
    """Create the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
    Each enzyme can be associated to a miss-cleavage value.

    :param enzymes: enzymes ids chosen by user
    :param miss_cleavage: associated miss-cleavage values
    :type enzymes: list(int)
    :type miss_cleavage: list(float)

    :return: list of enzyme's id with associated miss-cleavage values
    :rtype: list(int)
    """

    # Complete Enzymes to use (return)
    enzymes_to_use = []
    if enzymes:
        # Too much miss-cleavage values
        if len(miss_cleavage) > len(enzymes):
            core.handle_errors("Too much miss-cleavage values. Last values"
                               " will be ignored.")
            # Get only the first ones
            miss_cleavage = miss_cleavage[:len(enzymes)]
        cur_pos = -1
        # Get all enzymes with a given miss_cleavage
        for i, _ in enumerate(miss_cleavage):
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == enzymes[i]:
                    # Change miss_cleavage ratio
                    enz.ratio_miss_cleavage = miss_cleavage[i]
                    # Add it
                    enzymes_to_use.append(enz)
            cur_pos = i
        # Get all enzymes without miss_cleavage value
        for i in enzymes[cur_pos + 1:]:
            # In all available enzymes
            for enz in ALL_ENZYMES:
                # Get the current one
                if enz.id_ == i:
                    # Add it
                    enzymes_to_use.append(enz)
    # Return enzymes to use
    return enzymes_to_use
# Not tested
def get_enzymes_to_use(mode, id_enz_selected, miss_cleavage):
    """Get the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use.
    Each enzyme (and associated miss-cleavage value) are inputed by
    user. If there is a problem, user is interrogated again.

    :param mode: Digestion mode. If 'concurrent', no miss-cleavage values are used
    :param enzymes: enzyme's ids chosen by user
    :param miss_cleavage: associated miss-cleavage values
    :type mode: str
    :type enzymes: list(int)
    :type miss_cleavage: list(float)

    :return: list of enzyme's id with associated miss-cleavage values
    :rtype: list(int)

    .. warning:: Not tested
    """

    # Get the correct Enzymes inputed
    enzymes_to_use = create_enzymes_to_use(id_enz_selected, miss_cleavage)
    # No good Enzymes inputed, let user choose
    if not enzymes_to_use:
        id_enz_inputed = []
        # Print all available enzymes
        list_enzyme()
        # Ask user to give correct enzymes ids
        while not enzymes_to_use:
            id_enz_inp = input("Choose which enzyme(s) to use, separated by"
                               " comma (example: 1,5,6). (q) to quit:\n")
            # Quit
            if "q" in id_enz_inp:
                sys.exit(0)
            # Get a list of ids
            for i in id_enz_inp.split(","):
                try:
                    # Convert it to int
                    i = int(i)
                    id_enz_inputed.append(i)
                # Not an int?
                except ValueError:
                    # Throw an error
                    core.handle_errors("'%s' should be an integer value. This"
                                       " values will be ignored." % i)
            mc_enz_inputed = []
            if mode == "sequential":
                mc_enz_inp = input("Percentage of miss-cleavage per inputed"
                                   " enzyme (default 0), separated by comma"
                                   " (example: 1,5,6):\n")
                if mc_enz_inp:
                    # Get a list of int
                    for i in mc_enz_inp.split(","):
                        try:
                            # Convert it to int
                            i = float(i)
                            mc_enz_inputed.append(i)
                        # Not an int?
                        except ValueError:
                            # Throw an error
                            core.handle_errors("'%s' should be an floating"
                                               " value. This values will be"
                                               " ignored." % i)
            # Get the correct Enzyme if enzymes inputed
            enzymes_to_use = create_enzymes_to_use(id_enz_inputed,
                                                   mc_enz_inputed)
    # Return Enzymes to use
    return enzymes_to_use
# Not tested
def main():
    """Launcher of RapidPeptidesGenerator

    .. warning:: Not tested
    """
    parser = argparse.ArgumentParser(description="This software takes protein "
                                                 "sequences as input (-i optio"
                                                 "n). All sequences will be cl"
                                                 "eaved according to selected "
                                                 "enzymes (-e option) and give"
                                                 "n miss-cleavage percentage ("
                                                 "-m option). Cleaving can be "
                                                 "sequential or concurrent (-d"
                                                 " option). Resulting peptides"
                                                 " are outputted in a file (-o "
                                                 "option) if fasta, csv or tsv"
                                                 " format (-f option). Classic"
                                                 "al enzymes are included (-l "
                                                 "option to print available en"
                                                 "zymes) but it is possible to"
                                                 " define other enzymes (-a op"
                                                 "tion).")
    group_launch = parser.add_mutually_exclusive_group(required=True)
    group_launch.add_argument("-a", "--addenzyme", action="store_true",
                              help="Add a new cleaving enzyme. See the manual"
                              " for more information")
    parser.add_argument("-d", "--digest", metavar="",
                        choices=['s', 'sequential', 'c', 'concurrent'],
                        default="s", help="Digestion mode. Either 's', 'seque"
                        "ntial', 'c' or 'concurrent' (default: s)")
    parser.add_argument("-e", "--enzymes", metavar="", default=[],
                        action='append', type=restricted_enzyme_id,
                        help="Id of enzyme(s) to use (i.e. -e 0 -e 5 -e 10 to"
                        " use enzymes 0, 5 and 10). Use -l first to get "
                        "enzyme ids")
    parser.add_argument("-f", "--fmt", metavar="",
                        choices=['fasta', 'csv', 'tsv'], default="fasta",
                        help="Output file format. Either 'fasta', 'csv', or "
                        "'tsv' (default: fasta)")
    group_launch.add_argument("-i", "--inputdata", metavar="",
                              help="Input file, in fasta / fastq format or a "
                              "single protein sequence without commentary")
    group_launch.add_argument("-l", "--list", action="store_true",
                              help="Display the list of available enzymes")
    parser.add_argument("-m", "--misscleavage", metavar="", default=[],
                        action='append', type=restricted_float,
                        help="Percentage of miss-cleavage, between 0 and 100,"
                        " by enzyme(s). It should be in the same order than "
                        "-enzymes options (i.e. -m 15 -m 5 -m 10). Only for "
                        "sequential digestion (default: 0)")
    parser.add_argument("-n", "--noninteractive", action='store_true',
                        help="Non-interactive mode. No standard output, only "
                        "error(s) (--quiet enable, overwrite -v). If output "
                        "filename already exists, output file will be "
                        "overwritten.")
    group_output = parser.add_mutually_exclusive_group()
    group_output.add_argument("-o", "--outputfile", type=str, metavar="",
                              default="peptides", help="Result file to "
                              "output result peptides (default './peptides"
                              ".xxx' depending of --fmt)")
    group_output.add_argument("-r", "--randomname", action="store_true",
                              help="Random (not used) output name file")
    group_verbose = parser.add_mutually_exclusive_group()
    group_verbose.add_argument("-q", "--quiet", action="store_true",
                               help="No standard output, only error(s)")
    group_verbose.add_argument("-v", "--verbose", action="count", default=0,
                               help="Increase output verbosity")
    parser.add_argument("--version", action="version",
                        version="%(prog)s " + __version__ + " from " +
                        __revision_date__)
    args = parser.parse_args()

    # --addenzyme option
    if args.addenzyme:
        enzyme.user_creation_enzyme()
        sys.exit(0)

    # --digest option
    mode = "sequential"
    if args.digest == "c" or args.digest == "concurrent":
        mode = "concurrent"
        args.misscleavage = []  # No miss-cleavage on concurrent, infinite time

    # --list option
    if args.list:
        list_enzyme()
        sys.exit(0)

    # --nointeractive option
    if args.noninteractive:
        args.quiet = 1
        args.verbose = 0

    # --outputfile / --randomname options
    if args.randomname:
        # Generate a random file name
        output_file = str(uuid.uuid4().hex) + "." + args.fmt
        # Ensure the name is unique
        while os.path.isfile(output_file):
            # Generate a random file name
            output_file = str(uuid.uuid4().hex) + "." + args.fmt
    # Chosen file name
    else:
        # Given name
        tmpname = str(args.outputfile)
        # No extension?
        if "." not in tmpname:
            # Add default extension for this file format
            output_file = tmpname + "." + args.fmt
        else:
            output_file = tmpname
        # If interactive mode
        if not args.noninteractive:
            # This file already exist?
            while os.path.isfile(output_file):
                core.handle_errors("File '%s' already exit!" % output_file)
                # Don't overwrite it
                if input("Overwrite it? (y/n)\n") != "y":
                    tmpname = input("Output filename?\n")
                    # No extension?
                    if "." not in tmpname:
                        # Add default extension for this file format
                        output_file = tmpname + "." + args.fmt
                    else:
                        output_file = tmpname
                # Overwrite it
                else:
                    break

    # More miss cleavage than enzyme
    if len(args.misscleavage) > len(args.enzymes):
        core.handle_errors("Too much miss-cleavage values. Last values will "
                           "be ignored.")
        args.misscleavage = args.misscleavage[:len(args.enzymes)]

    # Get all enzymes inputed
    enzymes_to_use = get_enzymes_to_use(mode, args.enzymes, args.misscleavage)

    # Output options
    if args.verbose:
        print("Input: " + args.inputdata)
        print("Enzyme(s) used: " + str([enz.name for enz in enzymes_to_use]))
        print("Mode: " + mode)
        print("Miss-cleavage ratio: " +
              str([enz.ratio_miss_cleavage for enz in enzymes_to_use]))
        print("Output file: " + os.path.abspath(output_file))

    # Make the actual digestion of input data
    results_digestion = digest.digest_from_input(args.inputdata,
                                                 enzymes_to_use, mode)

    # Output results
    core.output_results(output_file, results_digestion, args.fmt, args.quiet,
                        args.verbose)


### Let'z go ###
if __name__ == '__main__':
    main()
    # The end
    sys.exit(0)