From 1122fe4df97ea5c0830a457724a1212716c844dd Mon Sep 17 00:00:00 2001
From: Nico Maillet <nicolas.maillet@pasteur.fr>
Date: Wed, 22 Feb 2023 11:55:05 +0100
Subject: [PATCH] Add theoritical peptides functions and firsts tests

---
 rpg/RapidPeptidesGenerator.py |  48 ++++++++++++++-
 rpg/digest.py                 |  57 ++++++++++++++++++
 rpg/enzyme.py                 |  48 ++++++++++-----
 tests/test_digest.py          |   4 ++
 tests/test_enzyme.py          |  80 +++++++++++++++++++++++--
 tests/test_functional.py      | 107 ++++++++++++++++++++++++++++++----
 6 files changed, 313 insertions(+), 31 deletions(-)

diff --git a/rpg/RapidPeptidesGenerator.py b/rpg/RapidPeptidesGenerator.py
index 667326c..4204a5a 100644
--- a/rpg/RapidPeptidesGenerator.py
+++ b/rpg/RapidPeptidesGenerator.py
@@ -73,6 +73,27 @@ def restricted_float(mc_val):
         core.handle_errors("miscleavage value should be a float between 0 "\
                            "and 100.", 0, "Type ")
 
+def restricted_int(mc_val):
+    """Restricts input miscleavage value to a int.
+
+    :param mc_val: value to test
+    :type mc_val: int
+
+    :return: the inputed value if correct
+    :rtype: int
+
+    :raises custom TypeError: if value is not an int
+    """
+    try:
+        mc_val = int(mc_val)
+        if mc_val < 0 :
+            core.handle_errors("miscleavage value should be greater then 0.", 0, "Value ")
+        return mc_val
+    except ValueError:
+        # Throw an error
+        core.handle_errors("miscleavage value should be an integer.", 0,
+                           "Type ")
+
 def restricted_enzyme_id(enz_id):
     """Restrict input enzyme id to a str corresponding to an enzyme.
 
@@ -204,6 +225,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
                                                    mc_enz_inputed)
     # Return Enzymes to use
     return enzymes_to_use
+
 # Not tested
 def main():
     """Launcher of RapidPeptidesGenerator"""
@@ -258,12 +280,20 @@ def main():
     group_launch.add_argument("-s", "--sequence", metavar="",
                               help="Input a single protein sequence without "\
                                    "commentary")
-    parser.add_argument("-m", "--miscleavage", metavar="", default=[],
+    group_mc = parser.add_mutually_exclusive_group(required=False)
+    group_mc.add_argument("-m", "--miscleavage", metavar="", default=[],
                         nargs='+', type=restricted_float,
                         help="Percentage of miscleavage, between 0 and 100,"
                         " by enzyme(s). It should be in the same order than "
                         "-enzymes options (e.g. -m 15 5 10). Only for sequenti"
                         "al digestion (default: 0)")
+    group_mc.add_argument("-t", "--theoretical", metavar="", default=[],
+                        nargs='+', type=restricted_int,
+                        help="Maximum number of miscleavages, by enzyme(s). "
+                        "RPG will produce all theoretical peptides allowing "
+                        "at most -t miscleavages. It should be in the same "
+                        "order than -enzymes options (e.g. -t 1 3 2). Only "
+                        "for sequential digestion (default: 0)")
     parser.add_argument("-n", "--noninteractive", action='store_true',
                         help="Non-interactive mode. No standard output, only "
                         "error(s) (--quiet enable, overwrite -v). If output "
@@ -441,6 +471,22 @@ def main():
                                                  aa_mass, water_mass,
                                                  args.processes)
 
+    # We need to compute all theoretical peptides for at least one enzyme
+    if args.theoretical:
+        # More mis cleavage than enzyme
+        if len(args.theoretical) > len(args.enzymes):
+            core.handle_errors("Too much miscleavage values. Last values will "
+                               "be ignored.")
+        # Not enough mis cleavage values, complete with 0
+        elif len(args.theoretical) < len(args.enzymes):
+            args.theoretical += [0]*(len(args.enzymes) - len(args.theoretical))
+        # Dict of enzymes/max mis cleavage
+        enzymes_to_use = {}
+        # Link enzyme to mc value
+        for i, j in enumerate(args.enzymes):
+            enzymes_to_use[enzyme.get_enz_name(ALL_ENZYMES, j)] = args.theoretical[i]
+        # Compute the results
+        digest.theoretical_peptides(results_digestion, enzymes_to_use)
     # Output results
     core.output_results(output_file, results_digestion, args.fmt, args.quiet,
                         args.verbose)
diff --git a/rpg/digest.py b/rpg/digest.py
index 0ea2e64..f6a0dff 100644
--- a/rpg/digest.py
+++ b/rpg/digest.py
@@ -550,3 +550,60 @@ def digest_from_input(input_data, input_type, enz, mode, aa_pka, aa_mass, water_
                            input_type, 0, "Input ")
     # Return all peptides
     return results_digestion
+
+def theoretical_peptides(results_digestion, mc_val):
+    """ Add in results_digestion the resulting peptides of mis cleaved
+    positions
+
+    :param results_digestion: result of digestions
+    :param mc_val: maxima number of mis cleavage per enzyme
+    :type results_digestion: list(list(:py:class:`ResultOneDigestion`))
+    :type mc_val: dict(enzyme_name: value)
+    """
+    # For each digested sequence
+    for a_seq in results_digestion:
+        # For each digestion results
+        for a_dig_res in a_seq:
+            # This result of digestion need to be undigested
+            if mc_val[a_dig_res.enzyme_name] != 0:
+                # Initial list of peptides (ordered by construction)
+                cur_peptides = list(a_dig_res.peptides)
+                # Id of the last peptide of this digestion
+                cur_id_pep = cur_peptides[-1].nb_peptide
+                # Current number of mis cleavage accepted
+                cur_mc_val = 1
+                # We still need to merge peptide
+                while cur_mc_val <= mc_val[a_dig_res.enzyme_name]:
+                    # For each peptide
+                    for i in range(len(cur_peptides) - cur_mc_val):
+                        # Get the peptides to merge
+                        some_pep = cur_peptides[i:i+cur_mc_val+1]
+                        # Take the header of the first peptide
+                        header = some_pep[0].header
+                        # Take the enzyme name of the first peptide
+                        enzyme_name = some_pep[0].enzyme_name
+                        # Take the pka of the first peptide
+                        aa_pka = some_pep[0].aa_pka
+                        # Take the aa masses of the first peptide
+                        aa_mass = some_pep[0].aa_mass
+                        # Take the water mass of the first peptide
+                        water_mass = some_pep[0].water_mass
+                        # Next nb_pep
+                        cur_id_pep += 1
+                        nb_peptide = cur_id_pep
+                        # Take the cleavage position of the last peptide
+                        position = some_pep[-1].position
+                        # Merge the sequences
+                        seq = "".join(x.sequence for x in some_pep)
+                        # New peptide
+                        new_pep = sequence.Peptide(header,
+                                                   seq,
+                                                   enzyme_name,
+                                                   aa_pka,
+                                                   aa_mass,
+                                                   water_mass,
+                                                   nb_peptide,
+                                                   position)
+                        a_dig_res.peptides.append(new_pep)
+                    # Increase number of mis cleavage
+                    cur_mc_val += 1
diff --git a/rpg/enzyme.py b/rpg/enzyme.py
index cb34adc..4c496b9 100644
--- a/rpg/enzyme.py
+++ b/rpg/enzyme.py
@@ -93,6 +93,31 @@ class Enzyme:
                 core.handle_errors("'%s' can't be open in '%s' mode" %
                                    (enz_file, "a"), 0, "File ")
 
+def get_enz_name(all_enzymes, name):
+    """ Get the proper name of an enzyme
+
+    :param all_enzymes: all already existing enzymes
+    :param name: name or id of the enzyme
+    :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`)
+    :type name: str
+
+    :return: The real name of an enzyme
+    :rtype: str
+    """
+    enz_name = None
+    # Get the name
+    for i in all_enzymes:
+        # Get the name of this enzyme
+        if (str(name).isdigit() and i.id_ == int(name)) or \
+           i.name.casefold() == str(name).casefold():
+            enz_name = i.name.strip()
+            break
+    # Enzyme not found
+    if not enz_name:
+        core.handle_errors(f"Not able to find enzyme {name}.", 0)
+    # Return the correct name of this enzyme
+    return enz_name
+
 def check_enzyme_name(name_new_enz, all_name_enz):
     """Validate the name of a new enzyme.
 
@@ -105,7 +130,7 @@ def check_enzyme_name(name_new_enz, all_name_enz):
     :rtype: bool
 
     Enzyme name should not contains whitespace character (' ', \\\\t,
-    \\\\n, \\\\r, \\\\f, \\\\v), be empty or be already used.
+    \\\\n, \\\\r, \\\\f, \\\\v), be empty, a digit or be already used.
     """
 
     ret = True
@@ -114,6 +139,12 @@ def check_enzyme_name(name_new_enz, all_name_enz):
         core.handle_errors("This name exist, please choose another name.", 2)
         ret = False
 
+    # Does it contain only digit character?
+    if name_new_enz.isdigit():
+        core.handle_errors("Enzyme name can't be only digits, please choose"\
+                           " another name.", 2)
+        ret = False
+
     # Does it contain ' ' character?
     res = re.search(" ", name_new_enz)
     if res:
@@ -393,7 +424,7 @@ def delete_enzyme(all_enzymes, name):
     :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`)
     :type name: str
 
-    .. warning:: Partially tested, remove bu ID can't be tested
+    .. warning:: Partially tested, remove by ID can't be tested
     """
     user_content = ""
     # Get the whole content of user file
@@ -401,17 +432,8 @@ def delete_enzyme(all_enzymes, name):
         for line in user_file:
             user_content += line
 
-    # We have an id, not a name of enzyme
-    if name.isdigit():
-        # Get the name
-        for i in all_enzymes:
-            # Get the name of this enzyme
-            if i.id_ == int(name):
-                enz_name = i.name.strip()
-                break
-    else:
-        # Use the given name
-        enz_name = name.strip()
+    # Get the name of the enzyme
+    enz_name = get_enz_name(all_enzymes, name)
 
     # Beginning of what to remove
     beg_to_detect = f"# User-defined enzyme {re.escape(enz_name)}\n"
diff --git a/tests/test_digest.py b/tests/test_digest.py
index 5c42a08..edec331 100644
--- a/tests/test_digest.py
+++ b/tests/test_digest.py
@@ -554,3 +554,7 @@ def test_digest_part(tmpdir):
     # We have a ValueError
     assert pytest_wrapped_e.type == ValueError
     assert str(pytest_wrapped_e.value) == "input file format not recognized (,)."
+
+def test_theoretical_peptides(capsys):
+    """Test function 'theoretical_peptides(results_digestion, mc_val)'"""
+    pass
\ No newline at end of file
diff --git a/tests/test_enzyme.py b/tests/test_enzyme.py
index a617aa7..2c8365f 100644
--- a/tests/test_enzyme.py
+++ b/tests/test_enzyme.py
@@ -162,8 +162,23 @@ def test_delete_enzyme(capsys):
         for line in usr_file:
             nb_line += 1
     assert nb_line == 27
+    # Virtually add enzyme Pwet-42
+    tmp = list(AVAILABLE_ENZYMES)
+    ENZ = []
+
+    C_1 = rule.Rule(0, "C", True, 1) # Always cleaves after C, except...
+    C_1E1 = rule.Rule(1, "E", False, -1) # Never cleaves after C, followed by E, except...
+    C_1.rules.append(C_1E1)
+    ENZ.append(C_1)
+
+    D_1 = rule.Rule(0, "D", True, 1) # Always cleaves after D, except...
+    ENZ.append(D_1)
+
+    ENZYME = enzyme.Enzyme(44, "Pwet-42", ENZ, 0)
+    # Add it to available enzymes
+    tmp.append(ENZYME)
     # Delete it
-    enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet-42")
+    enzyme.delete_enzyme(tmp, "Pwet-42")
 
     # Normal process
     all_enz = AVAILABLE_ENZYMES
@@ -183,8 +198,20 @@ def test_delete_enzyme(capsys):
         for line in usr_file:
             nb_line += 1
     assert nb_line == 22
+
+    # Virtually add enzyme Pwet
+    tmp = list(AVAILABLE_ENZYMES)
+    ENZ = []
+
+    R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except...
+    ENZ.append(R_1)
+
+    ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0)
+    # Add it to available enzymes
+    tmp.append(ENZYME)
+
     # Delete it
-    enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet")
+    enzyme.delete_enzyme(tmp, "Pwet")
     # We should be back at 11 lines
     nb_line = 1
     with open(user_file) as usr_file:
@@ -192,7 +219,7 @@ def test_delete_enzyme(capsys):
             nb_line += 1
     assert nb_line == 9
 
-    # Wrong name
+    # Wrong name to delete
     name = "Pwet"
     rules = ["(R,)"]
     # Number of line of user file before inserting an enzyme
@@ -209,14 +236,26 @@ def test_delete_enzyme(capsys):
         for line in usr_file:
             nb_line += 1
     assert nb_line == 22
+
+    # Virtually add enzyme Pwet
+    tmp = list(AVAILABLE_ENZYMES)
+    ENZ = []
+
+    R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except...
+    ENZ.append(R_1)
+
+    ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0)
+    # Add it to available enzymes
+    tmp.append(ENZYME)
+
     # Delete it
     with pytest.raises(SystemExit) as pytest_wrapped_e:
         enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwett")
     _, err = capsys.readouterr()
-    assert err == "Error: Not able to remove enzyme Pwett, please remove manually\n"
+    assert err == "Error: Not able to find enzyme Pwett.\n"
     assert pytest_wrapped_e.type == SystemExit
     assert pytest_wrapped_e.value.code == 1
-    enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwet")
+    enzyme.delete_enzyme(tmp, "Pwet")
     # We should be back at 11 lines
     nb_line = 1
     with open(user_file) as usr_file:
@@ -241,11 +280,40 @@ def test_delete_enzyme(capsys):
         for line in usr_file:
             nb_line += 1
     assert nb_line == 22
+    # Virtually add enzyme Pwet
+    tmp = list(AVAILABLE_ENZYMES)
+    ENZ = []
+
+    R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except...
+    ENZ.append(R_1)
+
+    ENZYME = enzyme.Enzyme(44, "Pwe.[t", ENZ, 0)
+    # Add it to available enzymes
+    tmp.append(ENZYME)
     # Delete it
-    enzyme.delete_enzyme(AVAILABLE_ENZYMES, "Pwe.[t")
+    enzyme.delete_enzyme(tmp, "Pwe.[t")
     # We should be back at 11 lines
     nb_line = 1
     with open(user_file) as usr_file:
         for line in usr_file:
             nb_line += 1
     assert nb_line == 9
+
+def test_get_enz_name(capsys):
+    """Test function 'get_enz_name()'."""
+    # Enzyme does not exist
+    with pytest.raises(SystemExit) as pytest_wrapped_e:
+        enzyme.get_enz_name(AVAILABLE_ENZYMES, "Pwett")
+    _, err = capsys.readouterr()
+    assert err == "Error: Not able to find enzyme Pwett.\n"
+    assert pytest_wrapped_e.type == SystemExit
+    assert pytest_wrapped_e.value.code == 1
+
+    # Normal behavior
+    assert enzyme.get_enz_name(AVAILABLE_ENZYMES, 42) == "Trypsin"
+
+    # Normal behavior
+    assert enzyme.get_enz_name(AVAILABLE_ENZYMES, "42") == "Trypsin"
+
+    # Normal behavior
+    assert enzyme.get_enz_name(AVAILABLE_ENZYMES, "trypsin") == "Trypsin"
\ No newline at end of file
diff --git a/tests/test_functional.py b/tests/test_functional.py
index 8cda12d..aaa9e53 100644
--- a/tests/test_functional.py
+++ b/tests/test_functional.py
@@ -7,6 +7,8 @@ from pathlib import Path
 from io import StringIO
 from .context import rpg
 from rpg import RapidPeptidesGenerator
+from rpg import enzyme
+from rpg import rule
 
 @pytest.fixture
 def truth():
@@ -550,6 +552,58 @@ def res_dig_1_42():
            ">Input_8_Trypsin_47_3_407.46678_3.74\n"\
            "EFL\n"
 
+@pytest.fixture
+def res_dig_1_42_2():
+    """ Result for digestion with 1 and 42 and IPC2 """
+    return ">Input_0_Trypsin_2_2_289.29138_5.98\n"\
+           "DR\n"\
+           ">Input_1_Trypsin_10_8_935.00128_4.29\n"\
+           "EALDSSWK\n"\
+           ">Input_2_Trypsin_11_1_146.18938_8.06\n"\
+           "K\n"\
+           ">Input_3_Trypsin_13_2_287.36218_9.71\n"\
+           "LR\n"\
+           ">Input_4_Trypsin_19_6_503.51548_9.71\n"\
+           "SGAGGR\n"\
+           ">Input_5_Trypsin_20_1_146.18938_8.06\n"\
+           "K\n"\
+           ">Input_6_Trypsin_25_5_529.59668_9.71\n"\
+           "NAGIR\n"\
+           ">Input_7_Trypsin_44_19_2283.62998_4.3\n"\
+           "LVLWMLDFDAHQPDSVLQR\n"\
+           ">Input_8_Trypsin_47_3_407.46678_3.74\n"\
+           "EFL\n"\
+           ">Input_9_Trypsin_10_10_1206.27738_4.49\n"\
+           "DREALDSSWK\n"\
+           ">Input_10_Trypsin_11_9_1063.17538_6.12\n"\
+           "EALDSSWKK\n"\
+           ">Input_11_Trypsin_13_3_415.53628_9.93\n"\
+           "KLR\n"\
+           ">Input_12_Trypsin_19_8_772.86238_11.49\n"\
+           "LRSGAGGR\n"\
+           ">Input_13_Trypsin_20_7_631.68958_9.93\n"\
+           "SGAGGRK\n"\
+           ">Input_14_Trypsin_25_6_657.77078_9.93\n"\
+           "KNAGIR\n"\
+           ">Input_15_Trypsin_44_24_2795.21138_5.46\n"\
+           "NAGIRLVLWMLDFDAHQPDSVLQR\n"\
+           ">Input_16_Trypsin_47_22_2673.08148_4.12\n"\
+           "LVLWMLDFDAHQPDSVLQREFL\n"\
+           ">Input_17_Trypsin_11_11_1334.45148_6.16\n"\
+           "DREALDSSWKK\n"\
+           ">Input_18_Trypsin_13_11_1332.52228_7.79\n"\
+           "EALDSSWKKLR\n"\
+           ">Input_19_Trypsin_19_9_901.03648_11.49\n"\
+           "KLRSGAGGR\n"\
+           ">Input_20_Trypsin_20_9_901.03648_11.49\n"\
+           "LRSGAGGRK\n"\
+           ">Input_21_Trypsin_25_12_1143.27098_11.49\n"\
+           "SGAGGRKNAGIR\n"\
+           ">Input_22_Trypsin_44_25_2923.38548_7.08\n"\
+           "KNAGIRLVLWMLDFDAHQPDSVLQR\n"\
+           ">Input_23_Trypsin_47_27_3184.66288_4.61\n"\
+           "NAGIRLVLWMLDFDAHQPDSVLQREFL\n"
+
 @pytest.fixture
 def res_dig_1_42_ipc():
     """ Result for digestion with 1 and 42 """
@@ -1106,20 +1160,51 @@ def test_na_option(capsys):
             nb_line += 1
     assert nb_line == 24
 
-    # Test -b behavior
-    with pytest.raises(SystemExit) as pytest_wrapped_e:
-        with unittest.mock.patch("sys.argv", ["func_test",
-                                              "-b", "Pwet"]):
-            RapidPeptidesGenerator.main()
-    assert pytest_wrapped_e.value.code == 0
-    # Output
-    captured = capsys.readouterr()
-    # Check result
-    user_file = str(Path.home()) + "/rpg_user.py"
+    # Virtually add enzyme Pwet
+    tmp = []
+    ENZ = []
 
-    # Number of line of user file after inserting enzyme in previous test
+    R_1 = rule.Rule(0, "R", True, 1) # Always cleaves after R, except...
+    ENZ.append(R_1)
+
+    ENZYME = enzyme.Enzyme(44, "Pwet", ENZ, 0)
+    # Add it to available enzymes
+    tmp.append(ENZYME)
+    # Delete the enzyme
+    enzyme.delete_enzyme(tmp, "Pwet")
+    # We should be back at 11 lines
     nb_line = 1
     with open(user_file) as usr_file:
         for line in usr_file:
             nb_line += 1
     assert nb_line == 9
+
+def test_theoretical(capsys, monkeypatch, list_enz, res_dig_1_42_2):
+    """ Test correct behavior """
+    with unittest.mock.patch("sys.argv", ["func_test",
+                                          "-s", "DREALDSSWKKLRSgagGRKNAGI"\
+                                                "RLVLWMLDFDAHQPDSVLQREFL",
+                                          "-e", "42",
+                                          "-t", "1", "2", "3"]):
+        RapidPeptidesGenerator.main()
+    captured = capsys.readouterr()
+    assert "Too much miscleavage values. Last values will be ignored.\n" in captured.err
+
+
+    with unittest.mock.patch("sys.argv", ["func_test",
+                                          "-s", "DREALDSSWKKLRSgagGRKNAGI"\
+                                                "RLVLWMLDFDAHQPDSVLQREFL",
+                                          "-e", "42",
+                                          "-t", "2"]):
+        RapidPeptidesGenerator.main()
+    captured = capsys.readouterr()
+    assert res_dig_1_42_2 in captured.out
+
+    with unittest.mock.patch("sys.argv", ["func_test",
+                                          "-s", "DREALDSSWKKLRSgagGRKNAGI"\
+                                                "RLVLWMLDFDAHQPDSVLQREFL",
+                                          "-e", "42", "2",
+                                          "-t", "2"]):
+        RapidPeptidesGenerator.main()
+    captured = capsys.readouterr()
+    assert res_dig_1_42_2 in captured.out
-- 
GitLab