From 1122fe4df97ea5c0830a457724a1212716c844dd Mon Sep 17 00:00:00 2001 From: Nico Maillet <nicolas.maillet@pasteur.fr> Date: Wed, 22 Feb 2023 11:55:05 +0100 Subject: [PATCH] Add theoritical peptides functions and firsts tests --- rpg/RapidPeptidesGenerator.py | 48 ++++++++++++++- rpg/digest.py | 57 ++++++++++++++++++ rpg/enzyme.py | 48 ++++++++++----- tests/test_digest.py | 4 ++ tests/test_enzyme.py | 80 +++++++++++++++++++++++-- tests/test_functional.py | 107 ++++++++++++++++++++++++++++++---- 6 files changed, 313 insertions(+), 31 deletions(-) diff --git a/rpg/RapidPeptidesGenerator.py b/rpg/RapidPeptidesGenerator.py index 667326c..4204a5a 100644 --- a/rpg/RapidPeptidesGenerator.py +++ b/rpg/RapidPeptidesGenerator.py @@ -73,6 +73,27 @@ def restricted_float(mc_val): core.handle_errors("miscleavage value should be a float between 0 "\ "and 100.", 0, "Type ") +def restricted_int(mc_val): + """Restricts input miscleavage value to a int. + + :param mc_val: value to test + :type mc_val: int + + :return: the inputed value if correct + :rtype: int + + :raises custom TypeError: if value is not an int + """ + try: + mc_val = int(mc_val) + if mc_val < 0 : + core.handle_errors("miscleavage value should be greater then 0.", 0, "Value ") + return mc_val + except ValueError: + # Throw an error + core.handle_errors("miscleavage value should be an integer.", 0, + "Type ") + def restricted_enzyme_id(enz_id): """Restrict input enzyme id to a str corresponding to an enzyme. @@ -204,6 +225,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage): mc_enz_inputed) # Return Enzymes to use return enzymes_to_use + # Not tested def main(): """Launcher of RapidPeptidesGenerator""" @@ -258,12 +280,20 @@ def main(): group_launch.add_argument("-s", "--sequence", metavar="", help="Input a single protein sequence without "\ "commentary") - parser.add_argument("-m", "--miscleavage", metavar="", default=[], + group_mc = parser.add_mutually_exclusive_group(required=False) + group_mc.add_argument("-m", "--miscleavage", metavar="", default=[], nargs='+', type=restricted_float, help="Percentage of miscleavage, between 0 and 100," " by enzyme(s). It should be in the same order than " "-enzymes options (e.g. -m 15 5 10). Only for sequenti" "al digestion (default: 0)") + group_mc.add_argument("-t", "--theoretical", metavar="", default=[], + nargs='+', type=restricted_int, + help="Maximum number of miscleavages, by enzyme(s). " + "RPG will produce all theoretical peptides allowing " + "at most -t miscleavages. It should be in the same " + "order than -enzymes options (e.g. -t 1 3 2). Only " + "for sequential digestion (default: 0)") parser.add_argument("-n", "--noninteractive", action='store_true', help="Non-interactive mode. No standard output, only " "error(s) (--quiet enable, overwrite -v). If output " @@ -441,6 +471,22 @@ def main(): aa_mass, water_mass, args.processes) + # We need to compute all theoretical peptides for at least one enzyme + if args.theoretical: + # More mis cleavage than enzyme + if len(args.theoretical) > len(args.enzymes): + core.handle_errors("Too much miscleavage values. Last values will " + "be ignored.") + # Not enough mis cleavage values, complete with 0 + elif len(args.theoretical) < len(args.enzymes): + args.theoretical += [0]*(len(args.enzymes) - len(args.theoretical)) + # Dict of enzymes/max mis cleavage + enzymes_to_use = {} + # Link enzyme to mc value + for i, j in enumerate(args.enzymes): + enzymes_to_use[enzyme.get_enz_name(ALL_ENZYMES, j)] = args.theoretical[i] + # Compute the results + digest.theoretical_peptides(results_digestion, enzymes_to_use) # Output results core.output_results(output_file, results_digestion, args.fmt, args.quiet, args.verbose) diff --git a/rpg/digest.py b/rpg/digest.py index 0ea2e64..f6a0dff 100644 --- a/rpg/digest.py +++ b/rpg/digest.py @@ -550,3 +550,60 @@ def digest_from_input(input_data, input_type, enz, mode, aa_pka, aa_mass, water_ input_type, 0, "Input ") # Return all peptides return results_digestion + +def theoretical_peptides(results_digestion, mc_val): + """ Add in results_digestion the resulting peptides of mis cleaved + positions + + :param results_digestion: result of digestions + :param mc_val: maxima number of mis cleavage per enzyme + :type results_digestion: list(list(:py:class:`ResultOneDigestion`)) + :type mc_val: dict(enzyme_name: value) + """ + # For each digested sequence + for a_seq in results_digestion: + # For each digestion results + for a_dig_res in a_seq: + # This result of digestion need to be undigested + if mc_val[a_dig_res.enzyme_name] != 0: + # Initial list of peptides (ordered by construction) + cur_peptides = list(a_dig_res.peptides) + # Id of the last peptide of this digestion + cur_id_pep = cur_peptides[-1].nb_peptide + # Current number of mis cleavage accepted + cur_mc_val = 1 + # We still need to merge peptide + while cur_mc_val <= mc_val[a_dig_res.enzyme_name]: + # For each peptide + for i in range(len(cur_peptides) - cur_mc_val): + # Get the peptides to merge + some_pep = cur_peptides[i:i+cur_mc_val+1] + # Take the header of the first peptide + header = some_pep[0].header + # Take the enzyme name of the first peptide + enzyme_name = some_pep[0].enzyme_name + # Take the pka of the first peptide + aa_pka = some_pep[0].aa_pka + # Take the aa masses of the first peptide + aa_mass = some_pep[0].aa_mass + # Take the water mass of the first peptide + water_mass = some_pep[0].water_mass + # Next nb_pep + cur_id_pep += 1 + nb_peptide = cur_id_pep + # Take the cleavage position of the last peptide + position = some_pep[-1].position + # Merge the sequences + seq = "".join(x.sequence for x in some_pep) + # New peptide + new_pep = sequence.Peptide(header, + seq, + enzyme_name, + aa_pka, + aa_mass, + water_mass, + nb_peptide, + position) + a_dig_res.peptides.append(new_pep) + # Increase number of mis cleavage + cur_mc_val += 1 diff --git a/rpg/enzyme.py b/rpg/enzyme.py index cb34adc..4c496b9 100644 --- a/rpg/enzyme.py +++ b/rpg/enzyme.py @@ -93,6 +93,31 @@ class Enzyme: core.handle_errors("'%s' can't be open in '%s' mode" % (enz_file, "a"), 0, "File ") +def get_enz_name(all_enzymes, name): + """ Get the proper name of an enzyme + + :param all_enzymes: all already existing enzymes + :param name: name or id of the enzyme + :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`) + :type name: str + + :return: The real name of an enzyme + :rtype: str + """ + enz_name = None + # Get the name + for i in all_enzymes: + # Get the name of this enzyme + if (str(name).isdigit() and i.id_ == int(name)) or \ + i.name.casefold() == str(name).casefold(): + enz_name = i.name.strip() + break + # Enzyme not found + if not enz_name: + core.handle_errors(f"Not able to find enzyme {name}.", 0) + # Return the correct name of this enzyme + return enz_name + def check_enzyme_name(name_new_enz, all_name_enz): """Validate the name of a new enzyme. @@ -105,7 +130,7 @@ def check_enzyme_name(name_new_enz, all_name_enz): :rtype: bool Enzyme name should not contains whitespace character (' ', \\\\t, - \\\\n, \\\\r, \\\\f, \\\\v), be empty or be already used. + \\\\n, \\\\r, \\\\f, \\\\v), be empty, a digit or be already used. """ ret = True @@ -114,6 +139,12 @@ def check_enzyme_name(name_new_enz, all_name_enz): core.handle_errors("This name exist, please choose another name.", 2) ret = False + # Does it contain only digit character? + if name_new_enz.isdigit(): + core.handle_errors("Enzyme name can't be only digits, please choose"\ + " another name.", 2) + ret = False + # Does it contain ' ' character? res = re.search(" ", name_new_enz) if res: @@ -393,7 +424,7 @@ def delete_enzyme(all_enzymes, name): :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`) :type name: str - .. warning:: Partially tested, remove bu ID can't be tested + .. warning:: Partially tested, remove by ID can't be tested """ user_content = "" # Get the whole content of user file @@ -401,17 +432,8 @@ def delete_enzyme(all_enzymes, name): for line in user_file: user_content += line - # We have an id, not a name of enzyme - if name.isdigit(): - # Get the name - for i in all_enzymes: - # Get the name of this enzyme - if i.id_ == int(name): - enz_name = i.name.strip() - break - else: - # Use the given name - enz_name = name.strip() + # Get the name of the enzyme + enz_name = get_enz_name(all_enzymes, name) # Beginning of what to remove beg_to_detect = f"# User-defined enzyme {re.escape(enz_name)}\n" diff --git a/tests/test_digest.py b/tests/test_digest.py index 5c42a08..edec331 100644 --- a/tests/test_digest.py +++ b/tests/test_digest.py @@ -554,3 +554,7 @@ def test_digest_part(tmpdir): # We have a ValueError assert pytest_wrapped_e.type == ValueError assert str(pytest_wrapped_e.value) == "input file format not recognized (,)." + +def test_theoretical_peptides(capsys): + """Test function 'theoretical_peptides(results_digestion, mc_val)'""" + pass \ No newline at end of file diff --git a/tests/test_enzyme.py b/tests/test_enzyme.py index a617aa7..2c8365f 100644 --- a/tests/test_enzyme.py +++ b/tests/test_enzyme.py @@ -162,8 +162,23 @@ def test_delete_enzyme(capsys): for line in usr_file: nb_line += 1 assert nb_line == 27 + # Virtually add enzyme Pwet-42 + tmp = list(AVAILABLE_ENZYMES) + ENZ = [] + + C_1 = rule.Rule(0, "C", True, 1) # Always cleaves after C, except... + C_1E1 = rule.Rule(1, "E", False, -1) # Never cleaves after C, followed by E, except... + C_1.rules.append(C_1E1) + ENZ.append(C_1) + + D_1 = rule.Rule(0, "D", True, 1) # Always cleaves after D, except... + ENZ.append(D_1) + + ENZYME = enzyme.Enzyme(44, "Pwet-42", ENZ, 0) + # Add it to available enzymes + tmp.append(ENZYME) # Delete it - enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet-42") + enzyme.delete_enzyme(tmp, "Pwet-42") # Normal process all_enz = AVAILABLE_ENZYMES @@ -183,8 +198,20 @@ def test_delete_enzyme(capsys): for line in usr_file: nb_line += 1 assert nb_line == 22 + + # Virtually add enzyme Pwet + tmp = list(AVAILABLE_ENZYMES) + ENZ = [] + + R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except... + ENZ.append(R_1) + + ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0) + # Add it to available enzymes + tmp.append(ENZYME) + # Delete it - enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet") + enzyme.delete_enzyme(tmp, "Pwet") # We should be back at 11 lines nb_line = 1 with open(user_file) as usr_file: @@ -192,7 +219,7 @@ def test_delete_enzyme(capsys): nb_line += 1 assert nb_line == 9 - # Wrong name + # Wrong name to delete name = "Pwet" rules = ["(R,)"] # Number of line of user file before inserting an enzyme @@ -209,14 +236,26 @@ def test_delete_enzyme(capsys): for line in usr_file: nb_line += 1 assert nb_line == 22 + + # Virtually add enzyme Pwet + tmp = list(AVAILABLE_ENZYMES) + ENZ = [] + + R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except... + ENZ.append(R_1) + + ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0) + # Add it to available enzymes + tmp.append(ENZYME) + # Delete it with pytest.raises(SystemExit) as pytest_wrapped_e: enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwett") _, err = capsys.readouterr() - assert err == "Error: Not able to remove enzyme Pwett, please remove manually\n" + assert err == "Error: Not able to find enzyme Pwett.\n" assert pytest_wrapped_e.type == SystemExit assert pytest_wrapped_e.value.code == 1 - enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet") + enzyme.delete_enzyme(tmp, "Pwet") # We should be back at 11 lines nb_line = 1 with open(user_file) as usr_file: @@ -241,11 +280,40 @@ def test_delete_enzyme(capsys): for line in usr_file: nb_line += 1 assert nb_line == 22 + # Virtually add enzyme Pwet + tmp = list(AVAILABLE_ENZYMES) + ENZ = [] + + R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except... + ENZ.append(R_1) + + ENZYME = enzyme.Enzyme(44, "Pwe.[t", ENZ, 0) + # Add it to available enzymes + tmp.append(ENZYME) # Delete it - enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwe.[t") + enzyme.delete_enzyme(tmp, "Pwe.[t") # We should be back at 11 lines nb_line = 1 with open(user_file) as usr_file: for line in usr_file: nb_line += 1 assert nb_line == 9 + +def test_get_enz_name(capsys): + """Test function 'get_enz_name()'.""" + # Enzyme does not exist + with pytest.raises(SystemExit) as pytest_wrapped_e: + enzyme.get_enz_name(AVAILABLE_ENZYMES, "Pwett") + _, err = capsys.readouterr() + assert err == "Error: Not able to find enzyme Pwett.\n" + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code == 1 + + # Normal behavior + assert enzyme.get_enz_name(AVAILABLE_ENZYMES, 42) == "Trypsin" + + # Normal behavior + assert enzyme.get_enz_name(AVAILABLE_ENZYMES, "42") == "Trypsin" + + # Normal behavior + assert enzyme.get_enz_name(AVAILABLE_ENZYMES, "trypsin") == "Trypsin" \ No newline at end of file diff --git a/tests/test_functional.py b/tests/test_functional.py index 8cda12d..aaa9e53 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -7,6 +7,8 @@ from pathlib import Path from io import StringIO from .context import rpg from rpg import RapidPeptidesGenerator +from rpg import enzyme +from rpg import rule @pytest.fixture def truth(): @@ -550,6 +552,58 @@ def res_dig_1_42(): ">Input_8_Trypsin_47_3_407.46678_3.74\n"\ "EFL\n" +@pytest.fixture +def res_dig_1_42_2(): + """ Result for digestion with 1 and 42 and IPC2 """ + return ">Input_0_Trypsin_2_2_289.29138_5.98\n"\ + "DR\n"\ + ">Input_1_Trypsin_10_8_935.00128_4.29\n"\ + "EALDSSWK\n"\ + ">Input_2_Trypsin_11_1_146.18938_8.06\n"\ + "K\n"\ + ">Input_3_Trypsin_13_2_287.36218_9.71\n"\ + "LR\n"\ + ">Input_4_Trypsin_19_6_503.51548_9.71\n"\ + "SGAGGR\n"\ + ">Input_5_Trypsin_20_1_146.18938_8.06\n"\ + "K\n"\ + ">Input_6_Trypsin_25_5_529.59668_9.71\n"\ + "NAGIR\n"\ + ">Input_7_Trypsin_44_19_2283.62998_4.3\n"\ + "LVLWMLDFDAHQPDSVLQR\n"\ + ">Input_8_Trypsin_47_3_407.46678_3.74\n"\ + "EFL\n"\ + ">Input_9_Trypsin_10_10_1206.27738_4.49\n"\ + "DREALDSSWK\n"\ + ">Input_10_Trypsin_11_9_1063.17538_6.12\n"\ + "EALDSSWKK\n"\ + ">Input_11_Trypsin_13_3_415.53628_9.93\n"\ + "KLR\n"\ + ">Input_12_Trypsin_19_8_772.86238_11.49\n"\ + "LRSGAGGR\n"\ + ">Input_13_Trypsin_20_7_631.68958_9.93\n"\ + "SGAGGRK\n"\ + ">Input_14_Trypsin_25_6_657.77078_9.93\n"\ + "KNAGIR\n"\ + ">Input_15_Trypsin_44_24_2795.21138_5.46\n"\ + "NAGIRLVLWMLDFDAHQPDSVLQR\n"\ + ">Input_16_Trypsin_47_22_2673.08148_4.12\n"\ + "LVLWMLDFDAHQPDSVLQREFL\n"\ + ">Input_17_Trypsin_11_11_1334.45148_6.16\n"\ + "DREALDSSWKK\n"\ + ">Input_18_Trypsin_13_11_1332.52228_7.79\n"\ + "EALDSSWKKLR\n"\ + ">Input_19_Trypsin_19_9_901.03648_11.49\n"\ + "KLRSGAGGR\n"\ + ">Input_20_Trypsin_20_9_901.03648_11.49\n"\ + "LRSGAGGRK\n"\ + ">Input_21_Trypsin_25_12_1143.27098_11.49\n"\ + "SGAGGRKNAGIR\n"\ + ">Input_22_Trypsin_44_25_2923.38548_7.08\n"\ + "KNAGIRLVLWMLDFDAHQPDSVLQR\n"\ + ">Input_23_Trypsin_47_27_3184.66288_4.61\n"\ + "NAGIRLVLWMLDFDAHQPDSVLQREFL\n" + @pytest.fixture def res_dig_1_42_ipc(): """ Result for digestion with 1 and 42 """ @@ -1106,20 +1160,51 @@ def test_na_option(capsys): nb_line += 1 assert nb_line == 24 - # Test -b behavior - with pytest.raises(SystemExit) as pytest_wrapped_e: - with unittest.mock.patch("sys.argv", ["func_test", - "-b", "Pwet"]): - RapidPeptidesGenerator.main() - assert pytest_wrapped_e.value.code == 0 - # Output - captured = capsys.readouterr() - # Check result - user_file = str(Path.home()) + "/rpg_user.py" + # Virtually add enzyme Pwet + tmp = [] + ENZ = [] - # Number of line of user file after inserting enzyme in previous test + R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except... + ENZ.append(R_1) + + ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0) + # Add it to available enzymes + tmp.append(ENZYME) + # Delete the enzyme + enzyme.delete_enzyme(tmp, "Pwet") + # We should be back at 11 lines nb_line = 1 with open(user_file) as usr_file: for line in usr_file: nb_line += 1 assert nb_line == 9 + +def test_theoretical(capsys, monkeypatch, list_enz, res_dig_1_42_2): + """ Test correct behavior """ + with unittest.mock.patch("sys.argv", ["func_test", + "-s", "DREALDSSWKKLRSgagGRKNAGI"\ + "RLVLWMLDFDAHQPDSVLQREFL", + "-e", "42", + "-t", "1", "2", "3"]): + RapidPeptidesGenerator.main() + captured = capsys.readouterr() + assert "Too much miscleavage values. Last values will be ignored.\n" in captured.err + + + with unittest.mock.patch("sys.argv", ["func_test", + "-s", "DREALDSSWKKLRSgagGRKNAGI"\ + "RLVLWMLDFDAHQPDSVLQREFL", + "-e", "42", + "-t", "2"]): + RapidPeptidesGenerator.main() + captured = capsys.readouterr() + assert res_dig_1_42_2 in captured.out + + with unittest.mock.patch("sys.argv", ["func_test", + "-s", "DREALDSSWKKLRSgagGRKNAGI"\ + "RLVLWMLDFDAHQPDSVLQREFL", + "-e", "42", "2", + "-t", "2"]): + RapidPeptidesGenerator.main() + captured = capsys.readouterr() + assert res_dig_1_42_2 in captured.out -- GitLab