From df253c3824448b76bee0c0faae65e8a7324f16fc Mon Sep 17 00:00:00 2001 From: Nicolas MAILLET <nicolas.maillet@pasteur.fr> Date: Wed, 3 Apr 2019 18:02:14 +0200 Subject: [PATCH] v 1.1.0 - change input option -i (file only) and add -s (sequence) --- CHANGELOG.rst | 3 ++ docs/conf.py | 2 +- rpg/RapidPeptidesGenerator.py | 27 +++++++++++--- rpg/digest.py | 16 +++++--- setup.py | 4 +- tests/test_core.py | 69 ++++++++++++++++++----------------- tests/test_digest.py | 14 ++++--- 7 files changed, 82 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 63983c5..41118fd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,9 @@ ========= CHANGELOG ========= +- 1.1.0 + Modify input. Now, option -i only takes files. Use option -s to input sequence + - 1.0.9 Correct a bug of random dict in the creation of new enzyme diff --git a/docs/conf.py b/docs/conf.py index aff06f3..64f0732 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -26,7 +26,7 @@ author = 'Nicolas Maillet' # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '1.0.9' +release = '1.1.0' # -- General configuration --------------------------------------------------- diff --git a/rpg/RapidPeptidesGenerator.py b/rpg/RapidPeptidesGenerator.py index f721d9a..de07794 100644 --- a/rpg/RapidPeptidesGenerator.py +++ b/rpg/RapidPeptidesGenerator.py @@ -29,9 +29,9 @@ necessary functions """ -__version_info__ = ('1', '0', '9') +__version_info__ = ('1', '1', '0') __version__ = '.'.join(__version_info__) -__revision_date__ = "2019-03-07" +__revision_date__ = "2019-04-03" __author__ = "Nicolas Maillet" import argparse @@ -260,10 +260,11 @@ def main(): help="Output file format. Either 'fasta', 'csv', or " "'tsv' (default: fasta)") group_launch.add_argument("-i", "--inputdata", metavar="", - help="Input file, in fasta / fastq format or a " - "single protein sequence without commentary") + help="Input file, in fasta / fastq format") group_launch.add_argument("-l", "--list", action="store_true", help="Display the list of available enzymes") + group_launch.add_argument("-s", "--sequence", metavar="", + help="Input a single protein sequence without commentary") parser.add_argument("-m", "--miscleavage", metavar="", default=[], nargs='+', type=restricted_float, help="Percentage of miscleavage, between 0 and 100," @@ -321,6 +322,20 @@ def main(): args.quiet = 1 args.verbose = 0 + # input data + input_data = None + input_type = None + if args.inputdata: + if os.path.isfile(args.inputdata): + input_data = args.inputdata + input_type = "file" + else: + core.handle_errors("file not found (%s)." % args.inputdata, 0, "I"\ + "nput ") + elif args.sequence: + input_data = args.sequence + input_type = "sequence" + # --outputfile / --randomname options output_file = "" # No output file (default) if args.randomname: @@ -369,7 +384,7 @@ def main(): # Output options if args.verbose: - print("Input: " + args.inputdata) + print("Input: " + input_data) print("Enzyme(s) used: " + str([enz.name for enz in enzymes_to_use])) print("Mode: " + mode) print("miscleavage ratio: " + @@ -378,7 +393,7 @@ def main(): print("Output file: " + os.path.abspath(output_file)) # Make the actual digestion of input data - results_digestion = digest.digest_from_input(args.inputdata, + results_digestion = digest.digest_from_input(input_data, input_type, enzymes_to_use, mode, aa_pka) # Output results diff --git a/rpg/digest.py b/rpg/digest.py index 649ed3e..586e43c 100644 --- a/rpg/digest.py +++ b/rpg/digest.py @@ -323,7 +323,7 @@ def digest_one_sequence(seq, enz, mode, aa_pka): elif mode == "concurrent": ret = concurrent_digest(seq, enz, aa_pka) else: - core.handle_errors("not able to understand digetion mode. Switching " + core.handle_errors("not able to understand digestion mode. Switching " "to 'sequential'.") ret = sequential_digest(seq, enz, aa_pka) return ret @@ -409,15 +409,17 @@ def concurrent_digest(seq, enz, aa_pka): # it will be one result by enzyme return [result] -def digest_from_input(input_data, enz, mode, aa_pka): +def digest_from_input(input_data, input_type, enz, mode, aa_pka): """Digest all sequences of input data with selected enzymes and mode. - :param input_data: either a sequence or a file of sequence (fasta/fastq) + :param input_data: either a sequence or the path of a file of sequence (fasta/fastq) + :param input_type: either 'sequence' or 'file' :param enz: enzymes to digest with :param mode: digestion mode (concurrent / sequential) :param aa_pka: pKa values (IPC / Stryer) :type input_data: str + :type input_type: str :type enz: list(:py:class:`~rpg.enzyme.Enzyme`) :type mode: str :type aa_pka: str @@ -428,7 +430,7 @@ def digest_from_input(input_data, enz, mode, aa_pka): # Results of digestion results_digestion = [] # Input is a file? - if os.path.isfile(input_data): + if input_type == "file": with open(input_data) as in_file: header_first_car = in_file.read(1) in_file.seek(0) @@ -476,11 +478,15 @@ def digest_from_input(input_data, enz, mode, aa_pka): core.handle_errors("input file format not recognized (%s)." % header_first_car, 0, "Input ") # input is a single sequence - else: + elif input_type == "sequence": tmp_seq = sequence.Sequence("Input", sequence.check_sequence(input_data)) # Digest the sequence results_digestion.append(digest_one_sequence(tmp_seq, enz, mode, aa_pka)) + # bad input + else: + core.handle_errors("input type not recognized (%s)." % + input_type, 0, "Input ") # Return all peptides return results_digestion diff --git a/setup.py b/setup.py index 2a3b26f..58b0c34 100644 --- a/setup.py +++ b/setup.py @@ -3,8 +3,8 @@ import os from setuptools import setup, find_packages _MAJOR = 1 -_MINOR = 0 -_MICRO = 9 +_MINOR = 1 +_MICRO = 0 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) diff --git a/tests/test_core.py b/tests/test_core.py index 75a3720..fe264b7 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -76,7 +76,8 @@ def test_output_results(capsys, tmpdir): # CSV output seq = "WQSDESDFZQSDESDF" aa_pka = core.AA_PKA_IPC - all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) + all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode, + aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = False @@ -86,12 +87,12 @@ def test_output_results(capsys, tmpdir): assert out == output_file.read() assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\ "pos,Peptide_size,Peptide_mass,pI,Sequence\n"\ - "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\ - "\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\ - "D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\ - "FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\ - "3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\ - "8,5.97,F\n" + "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\ + "D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\ + "ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\ + "14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\ + "58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\ + "19188,5.97,F\n" # TSV output output_file = tmpdir.join("test_result.tsv") @@ -104,12 +105,12 @@ def test_output_results(capsys, tmpdir): assert output_file.read() == "Original_header\tNo_peptide\tEnzyme\tCleavi"\ "ng_pos\tPeptide_size\tPeptide_mass\tpI\tSeq"\ "uence\nInput\t0\tfake_enzyme1\t4\t4\t534.52"\ - "598\t3.14\tWQSD\nInput\t1\tfake_enzyme1\t7\t"\ - "3\t349.29758\t3.04\tESD\nInput\t2\tfake_enzy"\ - "me1\t12\t5\t495.48938\t3.14\tFZQSD\nInput\t3"\ - "\tfake_enzyme1\t15\t3\t349.29758\t3.04\tESD"\ - "\nInput\t4\tfake_enzyme1\t16\t1\t165.19188"\ - "\t5.97\tF\n" + "598\t3.14\tWQSD\nInput\t1\tfake_enzyme1\t7"\ + "\t3\t349.29758\t3.04\tESD\nInput\t2\tfake_e"\ + "nzyme1\t12\t5\t495.48938\t3.14\tFZQSD\nInpu"\ + "t\t3\tfake_enzyme1\t15\t3\t349.29758\t3.04"\ + "\tESD\nInput\t4\tfake_enzyme1\t16\t1\t165.1"\ + "9188\t5.97\tF\n" # Fasta output output_file = tmpdir.join("test_result.fasta") @@ -119,16 +120,17 @@ def test_output_results(capsys, tmpdir): core.output_results(str(output_file), all_seq_digested, fmt, quiet, verbose) out, err = capsys.readouterr() assert out == output_file.read() - assert output_file.read() == ">Input_0_fake_enzyme1_4_4_534.52598_3.14\nWQ"\ - "SD\n>Input_1_fake_enzyme1_7_3_349.29758_3.04"\ - "\nESD\n>Input_2_fake_enzyme1_12_5_495.48938"\ - "_3.14\nFZQSD\n>Input_3_fake_enzyme1_15_3_349"\ - ".29758_3.04\nESD\n>Input_4_fake_enzyme1_16_1"\ - "_165.19188_5.97\nF\n" + assert output_file.read() == ">Input_0_fake_enzyme1_4_4_534.52598_3.14\nW"\ + "QSD\n>Input_1_fake_enzyme1_7_3_349.29758_3."\ + "04\nESD\n>Input_2_fake_enzyme1_12_5_495.489"\ + "38_3.14\nFZQSD\n>Input_3_fake_enzyme1_15_3_"\ + "349.29758_3.04\nESD\n>Input_4_fake_enzyme1_"\ + "16_1_165.19188_5.97\nF\n" # CSV output in quiet seq = "WQSDESDFZQSDESDF" - all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) + all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode, + aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = True @@ -140,16 +142,17 @@ def test_output_results(capsys, tmpdir): assert out == "" assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\ "pos,Peptide_size,Peptide_mass,pI,Sequence\n"\ - "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\ - "\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\ - "D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\ - "FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\ - "3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\ - "8,5.97,F\n" + "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\ + "D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\ + "ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\ + "14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\ + "58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\ + "19188,5.97,F\n" # CSV output in verbose > 2 seq = "WQSDESDFZQSDESDF" - all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) + all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode, + aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = False @@ -159,12 +162,12 @@ def test_output_results(capsys, tmpdir): out, err = capsys.readouterr() assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\ "pos,Peptide_size,Peptide_mass,pI,Sequence\n"\ - "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\ - "\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\ - "D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\ - "FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\ - "3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\ - "8,5.97,F\n" + "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\ + "D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\ + "ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\ + "14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\ + "58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\ + "19188,5.97,F\n" # Verbose > 2 assert out == "\nNumber of cleavage: 4\nCleavage position: 4, 7, 12, 15\n"\ "Number of miscleavage: 0\nmiscleavage position: \nmis"\ diff --git a/tests/test_digest.py b/tests/test_digest.py index 9735df5..89c2836 100644 --- a/tests/test_digest.py +++ b/tests/test_digest.py @@ -342,7 +342,7 @@ def test_digest_from_input(capsys, tmpdir): # Test wrong file with pytest.raises(SystemExit) as pytest_wrapped_e: - digest.digest_from_input(str(Path.home()) + "/rpg_user.py", + digest.digest_from_input(str(Path.home()) + "/rpg_user.py", "file", enzymes, mode, aa_pka) _, err = capsys.readouterr() assert err == "Input Error: input file format not recognized (f).\n" @@ -351,7 +351,7 @@ def test_digest_from_input(capsys, tmpdir): # Test input data seq = "WQSDESDFZQSDESDF" - res = digest.digest_from_input(seq, enzymes, mode, aa_pka) + res = digest.digest_from_input(seq, "sequence", enzymes, mode, aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ @@ -380,7 +380,8 @@ def test_digest_from_input(capsys, tmpdir): # Test fasta file fasta_file = tmpdir.join("test.fasta") fasta_file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET") - res = digest.digest_from_input(str(fasta_file), enzymes, mode, aa_pka) + res = digest.digest_from_input(str(fasta_file), "file", enzymes, mode, + aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ @@ -416,9 +417,10 @@ def test_digest_from_input(capsys, tmpdir): # Test fastq file (same result) fastq_file = tmpdir.join("test.fastq") - fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@Fa"\ - "ke2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n") - res = digest.digest_from_input(str(fastq_file), enzymes, mode, aa_pka) + fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\ + "ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n") + res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode, + aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ -- GitLab