diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f339b4e614cdaad772c6256a8f87db11a4842178..a2a4a48dbd1849796c4f7fe0dda7182dce0018a8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,11 @@ ========= CHANGELOG ========= +- 1.0.7 + Adding choice for pKa values (option -p) + + Fixing alphabetic order for enzymes + - 1.0.6 No default output file, only stdout diff --git a/docs/conf.py b/docs/conf.py index 6c548c4ab8751ef92dde0b5f67483d598d1493f0..7f9bc8e5f576e329e44771e3eb595fcbf9df2887 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,13 +20,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), # -- Project information ----------------------------------------------------- project = 'RapidPeptidesGenerator' -copyright = '2018, Nicolas Maillet' +copyright = '2019, Nicolas Maillet' author = 'Nicolas Maillet' # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '1.0.6' +release = '1.0.7' # -- General configuration --------------------------------------------------- diff --git a/docs/enzymes.rst b/docs/enzymes.rst index 0d1e86e86750681016784c5679980674ead84cb2..7c345c9726bdad1b6fb85221071a44943ed20bbc 100644 --- a/docs/enzymes.rst +++ b/docs/enzymes.rst @@ -36,8 +36,8 @@ Available enzymes 28: :ref:`lysc` 29: :ref:`lysn` 30: :ref:`neut` 31: :ref:`ntcb` 32: :ref:`pap` 33: :ref:`peps13` 34: :ref:`peps2` 35: :ref:`prol` 36: :ref:`protk` -37: :ref:`staphI` 38: :ref:`tev` 39: :ref:`therm` -40: :ref:`throm` 41: :ref:`thromsg` 42: :ref:`tryps` +37: :ref:`staphI` 38: :ref:`therm` 39: :ref:`throm` +40: :ref:`thromsg` 41: :ref:`tev` 42: :ref:`tryps` ================== ================== ================== .. _arg-c: @@ -747,23 +747,6 @@ More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.ht -.. _tev: - -Tobacco etch virus protease -........................... - -Tobacco etch virus protease (TEV) preferentially cleaves after Q (`P1`) when followed by G or S in `P1'` and preceded by Y in `P3` and E in `P6`. - -**RPG definition:** - -cleaving rule: - -* ``(E)()()(Y)()(Q,)(G or S)`` - -More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV - - - .. _therm: Thermolysin @@ -836,6 +819,23 @@ More information: see thrombin cleavage kits of +.. _tev: + +Tobacco etch virus protease +........................... + +Tobacco etch virus protease (TEV) preferentially cleaves after Q (`P1`) when followed by G or S in `P1'` and preceded by Y in `P3` and E in `P6`. + +**RPG definition:** + +cleaving rule: + +* ``(E)()()(Y)()(Q,)(G or S)`` + +More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV + + + .. _tryps: Trypsin diff --git a/rpg/RapidPeptidesGenerator.py b/rpg/RapidPeptidesGenerator.py index a8b1edc32bbaa083cfc65eba5a2c70d3ad4f5ce1..a6025668322e36472c39080378987beb6c043ddc 100644 --- a/rpg/RapidPeptidesGenerator.py +++ b/rpg/RapidPeptidesGenerator.py @@ -29,9 +29,9 @@ necessary functions """ -__version_info__ = ('1', '0', '6') +__version_info__ = ('1', '0', '7') __version__ = '.'.join(__version_info__) -__revision_date__ = "2018-11-19" +__revision_date__ = "2019-01-21" __author__ = "Nicolas Maillet" import argparse @@ -275,6 +275,9 @@ def main(): "error(s) (--quiet enable, overwrite -v). If output " "filename already exists, output file will be " "overwritten.") + parser.add_argument("-p", "--pka", metavar="", choices=['ipc', 'stryer'], + default="ipc", help="Define pKa values. Either 'ipc' " + "or 'stryer (default: ipc)") group_output = parser.add_mutually_exclusive_group() group_output.add_argument("-o", "--outputfile", type=str, metavar="", default="", help="Optional result file " @@ -303,6 +306,11 @@ def main(): mode = "concurrent" args.miscleavage = [] # No miscleavage on concurrent, infinite time + # --pka option + aa_pka = core.AA_PKA_IPC + if args.pka == "stryer": + aa_pka = core.AA_PKA_S + # --list option if args.list: list_enzyme() @@ -371,7 +379,7 @@ def main(): # Make the actual digestion of input data results_digestion = digest.digest_from_input(args.inputdata, - enzymes_to_use, mode) + enzymes_to_use, mode, aa_pka) # Output results core.output_results(output_file, results_digestion, args.fmt, args.quiet, diff --git a/rpg/core.py b/rpg/core.py index 8c3b0ddba69fdc90846133df28f491e1c4d5671c..5d4f2ec428fb0834186272eb0fc04c63c860aac4 100644 --- a/rpg/core.py +++ b/rpg/core.py @@ -64,26 +64,27 @@ WATER_MASS = 18.01528 """Mass of a water molecule.""" # Biochemistry Stryer 7th -#AA_PKA = {"Nterm" : 8.0, -# "C" : 8.3, -# "D" : 4.1, -# "E" : 4.1, -# "H" : 6.0, -# "K" : 10.8, -# "R" : 12.5, -# "Y" : 10.9, -# "Cterm" : 3.1} +AA_PKA_S = {"Nterm" : 8.0, + "C" : 8.3, + "D" : 4.1, + "E" : 4.1, + "H" : 6.0, + "K" : 10.8, + "R" : 12.5, + "Y" : 10.9, + "Cterm" : 3.1} +"""pKa of important amino acid to compute pI (from Stryer).""" # IPC_peptide -AA_PKA = {"Nterm" : 9.564, - "C" : 8.297, - "D" : 3.887, - "E" : 4.317, - "H" : 6.018, - "K" : 10.517, - "R" : 12.503, - "Y" : 10.071, - "Cterm" : 2.383} -"""pKa of important amino acid to compute pI.""" +AA_PKA_IPC = {"Nterm" : 9.564, + "C" : 8.297, + "D" : 3.887, + "E" : 4.317, + "H" : 6.018, + "K" : 10.517, + "R" : 12.503, + "Y" : 10.071, + "Cterm" : 2.383} +"""pKa of important amino acid to compute pI (from IPC).""" def handle_errors(message="", err=1, error_type=""): """Custom handling of errors and warnings. diff --git a/rpg/digest.py b/rpg/digest.py index 26d0068d7c299eb5cdeb9a5bd5decc28f4c4ab16..649ed3ec6a91bf7c65d05a5b80b19be713f81bfd 100644 --- a/rpg/digest.py +++ b/rpg/digest.py @@ -205,13 +205,15 @@ class ResultOneDigestion: ret += "C terminal peptide: " + self.peptides[-1].sequence return ret -def one_digest(pep, enz): +def one_digest(pep, enz, aa_pka): """Digest a peptide with an enzyme. :param pep: peptide to digest :param enz: enzyme to digest with + :param aa_pka: pKa values (IPC / Stryer) :type pep: :py:class:`~rpg.sequence.Peptide` :type enz: :py:class:`~rpg.enzyme.Enzyme` + :type aa_pka: str :return: result of the digestion :rtype: :py:class:`ResultOneDigestion` @@ -235,7 +237,7 @@ def one_digest(pep, enz): after = False tmp_seq = pep.sequence[previous_pos:pos] tmp_peptide = sequence.Peptide(pep.header, tmp_seq, enzyme_name, - cpt, pos + original_pos) + aa_pka, cpt, pos + original_pos) ret.add_peptide(tmp_peptide) cpt += 1 a_cut_occurs = True @@ -272,7 +274,8 @@ def one_digest(pep, enz): if previous_pos != pos: tmp_seq = pep.sequence[previous_pos:pos] tmp_peptide = sequence.Peptide(pep.header, tmp_seq, - enzyme_name, cpt, + enzyme_name, aa_pka, + cpt, pos + original_pos) ret.add_peptide(tmp_peptide) cpt += 1 @@ -291,45 +294,49 @@ def one_digest(pep, enz): if a_cut_occurs: tmp_pos = len(pep.sequence) # Last portion of protein tmp_seq = pep.sequence[previous_pos:] - tmp_peptide = sequence.Peptide(pep.header, tmp_seq, enzyme_name, cpt, - tmp_pos + original_pos) + tmp_peptide = sequence.Peptide(pep.header, tmp_seq, enzyme_name, + aa_pka, cpt, tmp_pos + original_pos) ret.add_peptide(tmp_peptide) # Not cut, don't change the peptide else: ret.add_peptide(pep) return ret -def digest_one_sequence(seq, enz, mode): +def digest_one_sequence(seq, enz, mode, aa_pka): """Launch a digest procedure on one sequence. :param sequence: sequence to digest :param enz: enzymes to digest with :param mode: digestion mode (concurrent / sequential) + :param aa_pka: pKa values (IPC / Stryer) :type sequence: :py:class:`~rpg.sequence.Sequence` :type enz: list(:py:class:`~rpg.enzyme.Enzyme`) :type mode: str + :type aa_pka: str :return: result of the digestion :rtype: list(:py:class:`ResultOneDigestion`) """ ret = None if mode == "sequential": - ret = sequential_digest(seq, enz) + ret = sequential_digest(seq, enz, aa_pka) elif mode == "concurrent": - ret = concurrent_digest(seq, enz) + ret = concurrent_digest(seq, enz, aa_pka) else: core.handle_errors("not able to understand digetion mode. Switching " "to 'sequential'.") - ret = sequential_digest(seq, enz) + ret = sequential_digest(seq, enz, aa_pka) return ret -def sequential_digest(seq, enz): +def sequential_digest(seq, enz, aa_pka): """Sequentially digest a sequence with all Enzymes, **one by one**. :param seq: sequence to digest :param enz: enzymes to digest with + :param aa_pka: pKa values (IPC / Stryer) :type seq: :py:class:`~rpg.sequence.Sequence` :type enz: list(:py:class:`~rpg.enzyme.Enzyme`) + :type aa_pka: str :return: result of the digestion :rtype: list(:py:class:`ResultOneDigestion`) @@ -338,19 +345,22 @@ def sequential_digest(seq, enz): # Check each enzymes for an_enz in enz: # Create a fake peptide from input sequence - fake_peptide = sequence.Peptide(seq.header, seq.sequence, an_enz.name) + fake_peptide = sequence.Peptide(seq.header, seq.sequence, an_enz.name, + aa_pka) # Digest it - ret.append(one_digest(fake_peptide, an_enz)) + ret.append(one_digest(fake_peptide, an_enz, aa_pka)) return ret -def concurrent_digest(seq, enz): +def concurrent_digest(seq, enz, aa_pka): """Concurrently digest a sequence with all Enzymes **at the same time**. :param seq: sequence to digest :param enz: enzymes to digest with + :param aa_pka: pKa values (IPC / Stryer) :type seq: :py:class:`~rpg.sequence.Sequence` :type enz: list(:py:class:`~rpg.enzyme.Enzyme`) + :type aa_pka: str :return: result of the digestion :rtype: list(:py:class:`ResultOneDigestion`) @@ -363,7 +373,7 @@ def concurrent_digest(seq, enz): enzymes_name_to_write = enzymes_name_to_write[:-1] # First peptide is the sequence itself fake_peptide = sequence.Peptide(seq.header, seq.sequence, - enzymes_name_to_write) + enzymes_name_to_write, aa_pka) # Result is currently just the sequence (list of one peptide) result = ResultOneDigestion(enzymes_name_to_write, [fake_peptide]) # Do we digest as much as we can? @@ -380,7 +390,8 @@ def concurrent_digest(seq, enz): # remove them from the global result for peptide in result.pop_peptides(): # Digest it, return a list of ResultOneDigestion - all_res_digestion_tmp.append(one_digest(peptide, an_enz)) + all_res_digestion_tmp.append(one_digest(peptide, an_enz, + aa_pka)) # Merge the result of digestion with previous result for i in all_res_digestion_tmp: result.merge(i) @@ -398,16 +409,18 @@ def concurrent_digest(seq, enz): # it will be one result by enzyme return [result] -def digest_from_input(input_data, enz, mode): +def digest_from_input(input_data, enz, mode, aa_pka): """Digest all sequences of input data with selected enzymes and mode. :param input_data: either a sequence or a file of sequence (fasta/fastq) :param enz: enzymes to digest with :param mode: digestion mode (concurrent / sequential) + :param aa_pka: pKa values (IPC / Stryer) :type input_data: str :type enz: list(:py:class:`~rpg.enzyme.Enzyme`) :type mode: str + :type aa_pka: str :return: result of digestions :rtype: list(list(:py:class:`ResultOneDigestion`)) @@ -436,7 +449,7 @@ def digest_from_input(input_data, enz, mode): sequence.check_sequence(seq)) # Digest sequence results_digestion.append(digest_one_sequence - (tmp_seq, enz, mode)) + (tmp_seq, enz, mode, aa_pka)) seq = "" header = tmp_line tmp_line = in_file.readline().strip() @@ -445,7 +458,7 @@ def digest_from_input(input_data, enz, mode): sequence.check_sequence(seq)) # Digest it results_digestion.append(digest_one_sequence(tmp_seq, enz, - mode)) + mode, aa_pka)) # Fastq file elif header_first_car == "@": header = in_file.readline().strip() @@ -454,9 +467,8 @@ def digest_from_input(input_data, enz, mode): tmp_seq = sequence.Sequence(header[1:], sequence.check_sequence(seq)) # Digest sequence - results_digestion.append(digest_one_sequence(tmp_seq, - enz, - mode)) + results_digestion.append(digest_one_sequence(tmp_seq, enz, + mode, aa_pka)) in_file.readline() in_file.readline() header = in_file.readline().strip() @@ -468,6 +480,7 @@ def digest_from_input(input_data, enz, mode): tmp_seq = sequence.Sequence("Input", sequence.check_sequence(input_data)) # Digest the sequence - results_digestion.append(digest_one_sequence(tmp_seq, enz, mode)) + results_digestion.append(digest_one_sequence(tmp_seq, enz, mode, + aa_pka)) # Return all peptides return results_digestion diff --git a/rpg/enzymes_definition.py b/rpg/enzymes_definition.py index a7a27463add9425e08be05246f667984362c3da1..39e67f312209bf23531a4ba542f8f87ec2aac16f 100644 --- a/rpg/enzymes_definition.py +++ b/rpg/enzymes_definition.py @@ -931,7 +931,7 @@ ENZ.append(AFTER_G) ENZ.append(AFTER_S) ENZ.append(AFTER_E) ENZ.append(AFTER_Y) -ENZYME = enzyme.Enzyme(CPT_ENZ, "Papain", ENZ, 0) +ENZYME = enzyme.Enzyme(CPT_ENZ, "Ficin", ENZ, 0) # Add it to available enzymes AVAILABLE_ENZYMES.append(ENZYME) CPT_ENZ += 1 @@ -1398,42 +1398,6 @@ CPT_ENZ += 1 -# Tobacco etch virus protease -# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV -# RULES: cleaves between Q (P1) and G or S in P1' when Y in P3 and E in P6. -# RULES: cleaves after E-Xaa-Xaa-Y-Xaa-Q-(G/S) -ENZ = [] - -# Cutting rule -AFTER_Q = rule.Rule(0, "Q", False, 1) # Never cleaves after Q, except... - -# Exceptions -EXECPT_QG = rule.Rule(1, "G", False, -1) # Never cleaves after Q, followed by G -EXECPT_QS = rule.Rule(1, "S", False, -1) # Never cleaves after Q, followed by S - -EXECPT_Y_Qx = rule.Rule(-2, "Y", False, -1) # Never cleaves after Q, followed by G/S, preceded by Y - -EXECPT_E__Y_Qx = rule.Rule(-5, "E", True, -1) # Always cleaves after Q, followed by G/S, preceded by Y and preceded by E - -# Add exception to cutting rules: -EXECPT_Y_Qx.rules.append(EXECPT_E__Y_Qx) - -EXECPT_QG.rules.append(EXECPT_Y_Qx) -EXECPT_QS.rules.append(EXECPT_Y_Qx) - -# Add exception to cutting rules -AFTER_Q.rules.append(EXECPT_QG) -AFTER_Q.rules.append(EXECPT_QS) - -# Add rules to enzyme -ENZ.append(AFTER_Q) -ENZYME = enzyme.Enzyme(CPT_ENZ, "Tobacco-Etch-Virus", ENZ, 0) -# Add it to available enzymes -AVAILABLE_ENZYMES.append(ENZYME) -CPT_ENZ += 1 - - - # Thermolysin # https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#Therm # RULES: cleaves before A,F,I,L,M or V (P1') not preceded by D or E in P1 and not followed by P in P2' @@ -1693,6 +1657,42 @@ CPT_ENZ += 1 +# Tobacco etch virus protease +# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV +# RULES: cleaves between Q (P1) and G or S in P1' when Y in P3 and E in P6. +# RULES: cleaves after E-Xaa-Xaa-Y-Xaa-Q-(G/S) +ENZ = [] + +# Cutting rule +AFTER_Q = rule.Rule(0, "Q", False, 1) # Never cleaves after Q, except... + +# Exceptions +EXECPT_QG = rule.Rule(1, "G", False, -1) # Never cleaves after Q, followed by G +EXECPT_QS = rule.Rule(1, "S", False, -1) # Never cleaves after Q, followed by S + +EXECPT_Y_Qx = rule.Rule(-2, "Y", False, -1) # Never cleaves after Q, followed by G/S, preceded by Y + +EXECPT_E__Y_Qx = rule.Rule(-5, "E", True, -1) # Always cleaves after Q, followed by G/S, preceded by Y and preceded by E + +# Add exception to cutting rules: +EXECPT_Y_Qx.rules.append(EXECPT_E__Y_Qx) + +EXECPT_QG.rules.append(EXECPT_Y_Qx) +EXECPT_QS.rules.append(EXECPT_Y_Qx) + +# Add exception to cutting rules +AFTER_Q.rules.append(EXECPT_QG) +AFTER_Q.rules.append(EXECPT_QS) + +# Add rules to enzyme +ENZ.append(AFTER_Q) +ENZYME = enzyme.Enzyme(CPT_ENZ, "Tobacco-Etch-Virus", ENZ, 0) +# Add it to available enzymes +AVAILABLE_ENZYMES.append(ENZYME) +CPT_ENZ += 1 + + + # Trypsin # https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#Tryps # RULES: after K except if next aa is P. This rule doesn't apply if W is before K diff --git a/rpg/sequence.py b/rpg/sequence.py index dfc389a5c6315acf8b0aa8e6f0752792d58f31db..3371ed0c1f4238c11243a7b87f48e1342c83fab8 100644 --- a/rpg/sequence.py +++ b/rpg/sequence.py @@ -33,11 +33,13 @@ class Peptide: :param header: header of the peptide :param sequence: sequence in amino acids :param enzyme_name: name of the enzyme used + :param aa_pka: pKa values (IPC / Stryer) :param nb_peptide: number of this peptide (default: 0) :param position: position of cleavage on the original sequence (default: 0) :type header: str :type sequence: str :type enzyme_name: str + :type aa_pka: str :type nb_peptide: int :type position: int @@ -48,10 +50,12 @@ class Peptide: :vartype mass: float :vartype p_i: float """ - def __init__(self, header, sequence, enzyme_name, nb_peptide=0, position=0): + def __init__(self, header, sequence, enzyme_name, aa_pka, nb_peptide=0, + position=0): self.header = header # header of this peptide self.sequence = sequence # peptide sequence self.enzyme_name = enzyme_name # name of the enzyme used + self.aa_pka = aa_pka # pKa values for pI calculation self.nb_peptide = nb_peptide # number of this peptide self.position = position # position of cleavage self.size = len(sequence) # size of the peptide @@ -64,11 +68,15 @@ class Peptide: # self representation for print def __repr__(self): + pka = "IPC" + if self.aa_pka == core.AA_PKA_S: + pka = "Stryer" return "Original header: " + self.header + "\nNo. peptide: " + \ str(self.nb_peptide) + "\nEnzyme: " + self.enzyme_name + \ "\nCleav. pos: " + str(self.position) + "\nPep. size: " + \ - str(self.size) + "\nPep. mass: " + str(self.mass) + "\nPep. pI: " \ - + str(self.p_i) + "\nSequence: " + self.sequence + "\n" + str(self.size) + "\nPep. mass: " + str(self.mass) + \ + "\npKa values from: " + pka + "\nPep. pI: " + str(self.p_i) +\ + "\nSequence: " + self.sequence + "\n" # Equality between two Peptides def __eq__(self, other): @@ -117,22 +125,22 @@ class Peptide: # While we are not precise enough while (ph_val-ph_min > precision) or (ph_max-ph_val > precision): # Compute the pI - qn1 = -1.0 / (1.0 + pow(10, (core.AA_PKA["Cterm"] - ph_val))) - qn2 = -self.sequence.count('D') / (1.0 + pow(10, (core.AA_PKA["D"]- + qn1 = -1.0 / (1.0 + pow(10, (self.aa_pka["Cterm"] - ph_val))) + qn2 = -self.sequence.count('D') / (1.0 + pow(10, (self.aa_pka["D"]- ph_val))) - qn3 = -self.sequence.count('E') / (1.0 + pow(10, (core.AA_PKA["E"]- + qn3 = -self.sequence.count('E') / (1.0 + pow(10, (self.aa_pka["E"]- ph_val))) - qn4 = -self.sequence.count('C') / (1.0 + pow(10, (core.AA_PKA["C"]- + qn4 = -self.sequence.count('C') / (1.0 + pow(10, (self.aa_pka["C"]- ph_val))) - qn5 = -self.sequence.count('Y') / (1.0 + pow(10, (core.AA_PKA["Y"]- + qn5 = -self.sequence.count('Y') / (1.0 + pow(10, (self.aa_pka["Y"]- ph_val))) qp1 = self.sequence.count('H') / (1.0 + pow(10, (ph_val - - core.AA_PKA["H"]))) - qp2 = 1.0 / (1.0 + pow(10, (ph_val - core.AA_PKA["Nterm"]))) + self.aa_pka["H"]))) + qp2 = 1.0 / (1.0 + pow(10, (ph_val - self.aa_pka["Nterm"]))) qp3 = self.sequence.count('K') / (1.0 + pow(10, (ph_val - - core.AA_PKA["K"]))) + self.aa_pka["K"]))) qp4 = self.sequence.count('R') / (1.0 + pow(10, (ph_val - - core.AA_PKA["R"]))) + self.aa_pka["R"]))) nq_final = qn1 + qn2 + qn3 + qn4 + qn5 + qp1 + qp2 + qp3 + qp4 # We are below solution, good pH value must be smaller if nq_final < 0.0: diff --git a/setup.py b/setup.py index ec9795783162e92b307e92d481b85c3b70e4ea88..b5f3e20da0405dd3619abb7aedbf238f2f9b8c16 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ from setuptools import setup, find_packages _MAJOR = 1 _MINOR = 0 -_MICRO = 6 +_MICRO = 7 version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO) release = '%d.%d' % (_MAJOR, _MINOR) diff --git a/tests/test_RapidPeptidesGenerator.py b/tests/test_RapidPeptidesGenerator.py index df7ef32b0ba2b91520b039bd34236fd4c4002274..1f1cbf5484e7dc625ea41d03217f39abb8280a5e 100644 --- a/tests/test_RapidPeptidesGenerator.py +++ b/tests/test_RapidPeptidesGenerator.py @@ -49,17 +49,15 @@ def test_list_enzyme(capsys): RapidPeptidesGenerator.list_enzyme() out, _ = capsys.readouterr() assert out == "1: Arg-C\n2: Asp-N\n3: BNPS-Skatole\n4: Bromelain\n5: Casp"\ - "ase-1\n6: Caspase-2\n7: Caspase-3\n8: Caspase-4\n9: Caspas"\ - "e-5\n10: Caspase-6\n11: Caspase-7\n12: Caspase-8\n13: Casp"\ - "ase-9\n14: Caspase-10\n15: Chymotrypsin-high\n16: Chymotry"\ - "psin-low\n17: Clostripain\n18: CNBr\n19: Enterokinase\n20:"\ - " Factor-Xa\n21: Papain\n22: Formic-acid\n23: Glu-C\n24: Gl"\ - "utamyl-endopeptidase\n25: Granzyme-B\n26: Hydroxylamine\n2"\ - "7: Iodosobenzoic-acid\n28: Lys-C\n29: Lys-N\n30: Neutrophi"\ - "l-elastase\n31: NTCB\n32: Papain\n33: Pepsin-pH1.3\n34: Pe"\ - "psin-pH>=2\n35: Proline-endopeptidase\n36: Proteinase-K\n3"\ - "7: Staphylococcal-peptidase-I\n38: Tobacco-Etch-Virus\n39:"\ - " Thermolysin\n40: Thrombin\n41: Thrombin-SG\n42: Trypsin\n" + "ase-1\n6: Caspase-2\n7: Caspase-3\n8: Caspase-4\n9: Caspase-5\n10: Caspa"\ + "se-6\n11: Caspase-7\n12: Caspase-8\n13: Caspase-9\n14: Caspase-10\n15: C"\ + "hymotrypsin-high\n16: Chymotrypsin-low\n17: Clostripain\n18: CNBr\n19: E"\ + "nterokinase\n20: Factor-Xa\n21: Ficin\n22: Formic-acid\n23: Glu-C\n24: G"\ + "lutamyl-endopeptidase\n25: Granzyme-B\n26: Hydroxylamine\n27: Iodosobenz"\ + "oic-acid\n28: Lys-C\n29: Lys-N\n30: Neutrophil-elastase\n31: NTCB\n32: P"\ + "apain\n33: Pepsin-pH1.3\n34: Pepsin-pH>=2\n35: Proline-endopeptidase\n36"\ + ": Proteinase-K\n37: Staphylococcal-peptidase-I\n38: Thermolysin\n39: Thr"\ + "ombin\n40: Thrombin-SG\n41: Tobacco-Etch-Virus\n42: Trypsin\n" def test_create_enzymes_to_use(capsys): """Test function 'create_enzymes_to_use(enzymes, miscleavage)'""" diff --git a/tests/test_core.py b/tests/test_core.py index e5cb68ad10343cc4e0d72b984d5dc6a9ce01801b..75a3720dd9ccc2076eaed46a65c8ab161fb7083c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -75,7 +75,8 @@ def test_output_results(capsys, tmpdir): # CSV output seq = "WQSDESDFZQSDESDF" - all_seq_digested = digest.digest_from_input(seq, enzymes, mode) + aa_pka = core.AA_PKA_IPC + all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = False @@ -127,7 +128,7 @@ def test_output_results(capsys, tmpdir): # CSV output in quiet seq = "WQSDESDFZQSDESDF" - all_seq_digested = digest.digest_from_input(seq, enzymes, mode) + all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = True @@ -148,7 +149,7 @@ def test_output_results(capsys, tmpdir): # CSV output in verbose > 2 seq = "WQSDESDFZQSDESDF" - all_seq_digested = digest.digest_from_input(seq, enzymes, mode) + all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka) output_file = tmpdir.join("test_result.csv") fmt = "csv" quiet = False @@ -176,23 +177,25 @@ def test_peptide(): header = "Test" seq = "QWSDESDF" enz_name = "fake_enzyme" - pep0 = sequence.Peptide(header, seq, enz_name, 1, 3) + aa_pka = core.AA_PKA_IPC + pep0 = sequence.Peptide(header, seq, enz_name, aa_pka, 1, 3) # Test function '__repr__()' print_res = pep0.__repr__() assert print_res == "Original header: Test\nNo. peptide: 1\nEnzyme: fake_"\ "enzyme\nCleav. pos: 3\nPep. size: 8\nPep. mass: 1012"\ - ".98488\nPep. pI: 2.91\nSequence: QWSDESDF\n" + ".98488\npKa values from: IPC\nPep. pI: 2.91\nSequenc"\ + "e: QWSDESDF\n" # Test function '__eq__()' - pep1 = sequence.Peptide(header, seq, enz_name, 1, 3) + pep1 = sequence.Peptide(header, seq, enz_name, aa_pka, 1, 3) assert pep0 == pep1 # Test function '__ne__()' - pep2 = sequence.Peptide(header, seq, enz_name, 1, 2) - pep3 = sequence.Peptide(header, seq, enz_name, 2, 3) - pep4 = sequence.Peptide(header, seq, enz_name + "A", 1, 3) - pep5 = sequence.Peptide(header, seq + "A", enz_name, 1, 3) - pep6 = sequence.Peptide(header + "A", seq, enz_name, 1, 3) + pep2 = sequence.Peptide(header, seq, enz_name, aa_pka, 1, 2) + pep3 = sequence.Peptide(header, seq, enz_name, aa_pka, 2, 3) + pep4 = sequence.Peptide(header, seq, enz_name + "A", aa_pka, 1, 3) + pep5 = sequence.Peptide(header, seq + "A", enz_name, aa_pka, 1, 3) + pep6 = sequence.Peptide(header + "A", seq, enz_name, aa_pka, 1, 3) assert pep0 != pep2 assert pep0 != pep3 assert pep0 != pep4 diff --git a/tests/test_digest.py b/tests/test_digest.py index 49667d198320b6b1cbdc45a8983de900543f25c2..9735df5cce6059196c38f84d7eb3beede73a6053 100644 --- a/tests/test_digest.py +++ b/tests/test_digest.py @@ -5,6 +5,7 @@ from .context import rpg from rpg import digest from rpg import enzyme from rpg import rule +from rpg import core from sequence import Peptide, Sequence def test_resultonedigestion(): @@ -12,8 +13,9 @@ def test_resultonedigestion(): header = "Test" seq = "QWSDESDF" enz_name = "fake_enzyme" - pep0 = Peptide(header, seq, enz_name, 0, 3) - pep1 = Peptide(header, seq, enz_name, 1, 4) + aa_pka = core.AA_PKA_IPC + pep0 = Peptide(header, seq, enz_name, aa_pka, 0, 3) + pep1 = Peptide(header, seq, enz_name, aa_pka, 1, 4) peptides = [pep0, pep1] nb_cleav = 1 pos_mc = [2, 3] @@ -26,11 +28,12 @@ def test_resultonedigestion(): "66\nPeptides: [Original header: Test\nNo. "\ "peptide: 0\nEnzyme: fake_enzyme\nCleav. po"\ "s: 3\nPep. size: 8\nPep. mass: 1012.98488"\ - "\nPep. pI: 2.91\nSequence: QWSDESDF\n, Ori"\ - "ginal header: Test\nNo. peptide: 1\nEnzyme"\ - ": fake_enzyme\nCleav. pos: 4\nPep. size: 8"\ - "\nPep. mass: 1012.98488\nPep. pI: 2.91\nSe"\ - "quence: QWSDESDF\n]\n" + "\npKa values from: IPC\nPep. pI: 2.91\nSeq"\ + "uence: QWSDESDF\n, Original header: Test\n"\ + "No. peptide: 1\nEnzyme: fake_enzyme\nCleav"\ + ". pos: 4\nPep. size: 8\nPep. mass: 1012.98"\ + "488\npKa values from: IPC\nPep. pI: 2.91\n"\ + "Sequence: QWSDESDF\n]\n" # Test function '__eq__()' res_dig1 = digest.ResultOneDigestion(enz_name, peptides, nb_cleav, pos_mc) @@ -130,23 +133,24 @@ def test_one_digest(): rule_dict[rule_exc] = False all_rules = rule.create_rules(rule_dict) enz = enzyme.Enzyme(-1, "fake_enzyme", all_rules) + aa_pka = core.AA_PKA_IPC # Input sequence - pep = Peptide("Test", "WASD", enz.name) + pep = Peptide("Test", "WASD", enz.name, aa_pka) # Results, two peptides: 'WAS' and 'D' - res_pep0 = Peptide("Test", "WAS", enz.name, 0, 3) - res_pep1 = Peptide("Test", "D", enz.name, 1, 4) + res_pep0 = Peptide("Test", "WAS", enz.name, aa_pka, 0, 3) + res_pep1 = Peptide("Test", "D", enz.name, aa_pka, 1, 4) # Test it! - res = digest.one_digest(pep, enz) + res = digest.one_digest(pep, enz, aa_pka) assert res.enzyme_name == "fake_enzyme" assert res.peptides[0].__repr__() == res_pep0.__repr__() assert res.peptides[1].__repr__() == res_pep1.__repr__() # Input sequence - pep = Peptide("Test", "WADSD", enz.name) + pep = Peptide("Test", "WADSD", enz.name, aa_pka) # Results, no cut - res_pep1 = Peptide("Test", "WADSD", enz.name, 0, 0) + res_pep1 = Peptide("Test", "WADSD", enz.name, aa_pka, 0, 0) # Test it! - res = digest.one_digest(pep, enz) + res = digest.one_digest(pep, enz, aa_pka) assert res.enzyme_name == "fake_enzyme" assert res.peptides[0].__repr__() == res_pep1.__repr__() @@ -157,12 +161,12 @@ def test_one_digest(): all_rules = rule.create_rules(rule_dict) enz = enzyme.Enzyme(-1, "fake_enzyme", all_rules) # Input sequence - pep = Peptide("Test", "WADS", enz.name) + pep = Peptide("Test", "WADS", enz.name, aa_pka) # Results, two peptides: 'WAS' and 'D' - res_pep0 = Peptide("Test", "WAD", enz.name, 0, 3) - res_pep1 = Peptide("Test", "S", enz.name, 1, 4) + res_pep0 = Peptide("Test", "WAD", enz.name, aa_pka, 0, 3) + res_pep1 = Peptide("Test", "S", enz.name, aa_pka, 1, 4) # Test it! - res = digest.one_digest(pep, enz) + res = digest.one_digest(pep, enz, aa_pka) assert res.enzyme_name == "fake_enzyme" assert res.peptides[0].__repr__() == res_pep0.__repr__() assert res.peptides[1].__repr__() == res_pep1.__repr__() @@ -179,6 +183,7 @@ def test_digest_one_sequence(): all_rules = rule.create_rules(rule_dict) enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) enzymes.append(enz1) + aa_pka = core.AA_PKA_IPC # Second enzyme: cut after S rule_dict = {} @@ -194,13 +199,13 @@ def test_digest_one_sequence(): # Sequential, only one enzyme will cut mode = "sequential" # Enz1 will not cut - res_pep0 = Peptide("Test", "WASDESDF", enzymes[0].name, 0, 0) + res_pep0 = Peptide("Test", "WASDESDF", enzymes[0].name, aa_pka, 0, 0) # Enz2 will cut, three peptides: 'WAS', 'DES' and 'DF' - res_pep1 = Peptide("Test", "WAS", enzymes[1].name, 0, 3) - res_pep2 = Peptide("Test", "DES", enzymes[1].name, 1, 6) - res_pep3 = Peptide("Test", "DF", enzymes[1].name, 2, 8) + res_pep1 = Peptide("Test", "WAS", enzymes[1].name, aa_pka, 0, 3) + res_pep2 = Peptide("Test", "DES", enzymes[1].name, aa_pka, 1, 6) + res_pep3 = Peptide("Test", "DF", enzymes[1].name, aa_pka, 2, 8) # Test it! - res = digest.digest_one_sequence(seq, enzymes, mode) + res = digest.digest_one_sequence(seq, enzymes, mode, aa_pka) assert res[0].enzyme_name == "fake_enzyme1" assert res[0].peptides[0].__repr__() == res_pep0.__repr__() assert res[1].enzyme_name == "fake_enzyme2" @@ -212,13 +217,13 @@ def test_digest_one_sequence(): mode = "concurrent" enzs_name = enzymes[0].name + "-" + enzymes[1].name # Results, five peptides: 'WAS', 'D', ES', 'D', and 'F' - res_pep0 = Peptide("Test", "WAS", enzs_name, 0, 3) - res_pep1 = Peptide("Test", "D", enzs_name, 1, 4) - res_pep2 = Peptide("Test", "ES", enzs_name, 2, 6) - res_pep3 = Peptide("Test", "D", enzs_name, 3, 7) - res_pep4 = Peptide("Test", "F", enzs_name, 4, 8) + res_pep0 = Peptide("Test", "WAS", enzs_name, aa_pka, 0, 3) + res_pep1 = Peptide("Test", "D", enzs_name, aa_pka, 1, 4) + res_pep2 = Peptide("Test", "ES", enzs_name, aa_pka, 2, 6) + res_pep3 = Peptide("Test", "D", enzs_name, aa_pka, 3, 7) + res_pep4 = Peptide("Test", "F", enzs_name, aa_pka, 4, 8) # Test it! - res = digest.digest_one_sequence(seq, enzymes, mode) + res = digest.digest_one_sequence(seq, enzymes, mode, aa_pka) assert res[0].enzyme_name == enzs_name assert res[0].peptides[0].__repr__() == res_pep0.__repr__() assert res[0].peptides[1].__repr__() == res_pep1.__repr__() @@ -229,13 +234,13 @@ def test_digest_one_sequence(): # Error, so sequential, only one enzyme will cut mode = "pwet" # Enz1 will not cut - res_pep0 = Peptide("Test", "WASDESDF", enzymes[0].name, 0, 0) + res_pep0 = Peptide("Test", "WASDESDF", enzymes[0].name, aa_pka, 0, 0) # Enz2 will cut, three peptides: 'WAS', 'DES' and 'DF' - res_pep1 = Peptide("Test", "WAS", enzymes[1].name, 0, 3) - res_pep2 = Peptide("Test", "DES", enzymes[1].name, 1, 6) - res_pep3 = Peptide("Test", "DF", enzymes[1].name, 2, 8) + res_pep1 = Peptide("Test", "WAS", enzymes[1].name, aa_pka, 0, 3) + res_pep2 = Peptide("Test", "DES", enzymes[1].name, aa_pka, 1, 6) + res_pep3 = Peptide("Test", "DF", enzymes[1].name, aa_pka, 2, 8) # Test it! - res = digest.digest_one_sequence(seq, enzymes, mode) + res = digest.digest_one_sequence(seq, enzymes, mode, aa_pka) assert res[0].enzyme_name == "fake_enzyme1" assert res[0].peptides[0].__repr__() == res_pep0.__repr__() assert res[1].enzyme_name == "fake_enzyme2" @@ -253,6 +258,7 @@ def test_sequential_digest(): all_rules = rule.create_rules(rule_dict) enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) enzymes.append(enz1) + aa_pka = core.AA_PKA_IPC # Second enzyme: cut after S rule_dict = {} @@ -266,15 +272,15 @@ def test_sequential_digest(): seq = Sequence("Test", "WASDESDF") # Enz1 will cut - res_pep0 = Peptide("Test", "WASD", enzymes[0].name, 0, 4) - res_pep1 = Peptide("Test", "ESD", enzymes[0].name, 1, 7) - res_pep2 = Peptide("Test", "F", enzymes[0].name, 2, 8) + res_pep0 = Peptide("Test", "WASD", enzymes[0].name, aa_pka, 0, 4) + res_pep1 = Peptide("Test", "ESD", enzymes[0].name, aa_pka, 1, 7) + res_pep2 = Peptide("Test", "F", enzymes[0].name, aa_pka, 2, 8) # Enz2 will cut, three peptides: 'WAS', 'DES' and 'DF' - res_pep3 = Peptide("Test", "WAS", enzymes[1].name, 0, 3) - res_pep4 = Peptide("Test", "DES", enzymes[1].name, 1, 6) - res_pep5 = Peptide("Test", "DF", enzymes[1].name, 2, 8) + res_pep3 = Peptide("Test", "WAS", enzymes[1].name, aa_pka, 0, 3) + res_pep4 = Peptide("Test", "DES", enzymes[1].name, aa_pka, 1, 6) + res_pep5 = Peptide("Test", "DF", enzymes[1].name, aa_pka, 2, 8) # Test it! - res = digest.sequential_digest(seq, enzymes) + res = digest.sequential_digest(seq, enzymes, aa_pka) assert res[0].enzyme_name == "fake_enzyme1" assert res[0].peptides[0].__repr__() == res_pep0.__repr__() assert res[0].peptides[1].__repr__() == res_pep1.__repr__() @@ -294,6 +300,7 @@ def test_concurrent_digest(): all_rules = rule.create_rules(rule_dict) enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) enzymes.append(enz1) + aa_pka = core.AA_PKA_S # Second enzyme: cut after S rule_dict = {} @@ -307,14 +314,14 @@ def test_concurrent_digest(): seq = Sequence("Test", "WASDESDF") enzs_name = enzymes[0].name + "-" + enzymes[1].name # Results - res_pep0 = Peptide("Test", "WAS", enzs_name, 0, 3) - res_pep1 = Peptide("Test", "D", enzs_name, 1, 4) - res_pep2 = Peptide("Test", "ES", enzs_name, 2, 6) - res_pep3 = Peptide("Test", "D", enzs_name, 3, 7) - res_pep4 = Peptide("Test", "F", enzs_name, 4, 8) + res_pep0 = Peptide("Test", "WAS", enzs_name, aa_pka, 0, 3) + res_pep1 = Peptide("Test", "D", enzs_name, aa_pka, 1, 4) + res_pep2 = Peptide("Test", "ES", enzs_name, aa_pka, 2, 6) + res_pep3 = Peptide("Test", "D", enzs_name, aa_pka, 3, 7) + res_pep4 = Peptide("Test", "F", enzs_name, aa_pka, 4, 8) # Test it! - res = digest.concurrent_digest(seq, enzymes) + res = digest.concurrent_digest(seq, enzymes, aa_pka) assert res[0].enzyme_name == enzs_name assert res[0].peptides[0].__repr__() == res_pep0.__repr__() assert res[0].peptides[1].__repr__() == res_pep1.__repr__() @@ -331,11 +338,12 @@ def test_digest_from_input(capsys, tmpdir): enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) enzymes = [enz1] mode = "sequential" + aa_pka = core.AA_PKA_IPC # Test wrong file with pytest.raises(SystemExit) as pytest_wrapped_e: digest.digest_from_input(str(Path.home()) + "/rpg_user.py", - enzymes, mode) + enzymes, mode, aa_pka) _, err = capsys.readouterr() assert err == "Input Error: input file format not recognized (f).\n" assert pytest_wrapped_e.type == SystemExit @@ -343,92 +351,103 @@ def test_digest_from_input(capsys, tmpdir): # Test input data seq = "WQSDESDFZQSDESDF" - res = digest.digest_from_input(seq, enzymes, mode) + res = digest.digest_from_input(seq, enzymes, mode, aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ "[Original header: Input\nNo. peptide: 0"\ "\nEnzyme: fake_enzyme1\nCleav. pos: 4\nPe"\ - "p. size: 4\nPep. mass: 534.52598\nPep. pI"\ - ": 3.14\nSequence: WQSD\n, Original header:"\ - " Input\nNo. peptide: 1\nEnzyme: fake_enzy"\ - "me1\nCleav. pos: 7\nPep. size: 3\nPep. ma"\ - "ss: 349.29758\nPep. pI: 3.04\nSequence: ES"\ - "D\n, Original header: Input\nNo. peptide:"\ - " 2\nEnzyme: fake_enzyme1\nCleav. pos: 12"\ - "\nPep. size: 5\nPep. mass: 495.48938\nPep"\ - ". pI: 3.14\nSequence: FZQSD\n, Original he"\ - "ader: Input\nNo. peptide: 3\nEnzyme: fake"\ - "_enzyme1\nCleav. pos: 15\nPep. size: 3\nP"\ - "ep. mass: 349.29758\nPep. pI: 3.04\nSequen"\ - "ce: ESD\n, Original header: Input\nNo. pe"\ - "ptide: 4\nEnzyme: fake_enzyme1\nCleav. po"\ - "s: 16\nPep. size: 1\nPep. mass: 165.19188"\ - "\nPep. pI: 5.97\nSequence: F\n]\n" + "p. size: 4\nPep. mass: 534.52598\npKa val"\ + "ues from: IPC\nPep. pI: 3.14\nSequence: W"\ + "QSD\n, Original header: Input\nNo. peptid"\ + "e: 1\nEnzyme: fake_enzyme1\nCleav. pos: 7"\ + "\nPep. size: 3\nPep. mass: 349.29758\npKa"\ + " values from: IPC\nPep. pI: 3.04\nSequenc"\ + "e: ESD\n, Original header: Input\nNo. pep"\ + "tide: 2\nEnzyme: fake_enzyme1\nCleav. pos"\ + ": 12\nPep. size: 5\nPep. mass: 495.48938"\ + "\npKa values from: IPC\nPep. pI: 3.14\nSe"\ + "quence: FZQSD\n, Original header: Input\n"\ + "No. peptide: 3\nEnzyme: fake_enzyme1\nCle"\ + "av. pos: 15\nPep. size: 3\nPep. mass: 349"\ + ".29758\npKa values from: IPC\nPep. pI: 3."\ + "04\nSequence: ESD\n, Original header: Inp"\ + "ut\nNo. peptide: 4\nEnzyme: fake_enzyme1"\ + "\nCleav. pos: 16\nPep. size: 1\nPep. mass"\ + ": 165.19188\npKa values from: IPC\nPep. p"\ + "I: 5.97\nSequence: F\n]\n" # Test fasta file fasta_file = tmpdir.join("test.fasta") fasta_file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET") - res = digest.digest_from_input(str(fasta_file), enzymes, mode) + res = digest.digest_from_input(str(fasta_file), enzymes, mode, aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ "[Original header: Fake1\nNo. peptide: 0\n"\ "Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\ - " size: 4\nPep. mass: 534.52598\nPep. pI: "\ - "3.14\nSequence: WQSD\n, Original header: F"\ - "ake1\nNo. peptide: 1\nEnzyme: fake_enzyme"\ - "1\nCleav. pos: 7\nPep. size: 3\nPep. mass"\ - ": 349.29758\nPep. pI: 3.04\nSequence: ESD"\ - "\n, Original header: Fake1\nNo. peptide: "\ - "2\nEnzyme: fake_enzyme1\nCleav. pos: 12\n"\ - "Pep. size: 5\nPep. mass: 495.48938\nPep. "\ - "pI: 3.14\nSequence: FZQSD\n, Original head"\ - "er: Fake1\nNo. peptide: 3\nEnzyme: fake_e"\ - "nzyme1\nCleav. pos: 15\nPep. size: 3\nPep"\ - ". mass: 349.29758\nPep. pI: 3.04\nSequence"\ - ": ESD\n, Original header: Fake1\nNo. pept"\ - "ide: 4\nEnzyme: fake_enzyme1\nCleav. pos:"\ - " 16\nPep. size: 1\nPep. mass: 165.19188\n"\ - "Pep. pI: 5.97\nSequence: F\n]\n" - assert res[1][0].__repr__() == "Number of cleavage: 0\nNumber of miscle"\ - "avage: 0\nPositions of miscleavage: []"\ - "\nRatio of miscleavage: 0\nPeptides: [O"\ - "riginal header: Fake2\nNo. peptide: 0\nEn"\ - "zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\ - "ize: 15\nPep. mass: 2014.35098\nPep. pI: "\ - "7.16\nSequence: NPHARDORCOMPLET\n]\n" + " size: 4\nPep. mass: 534.52598\npKa value"\ + "s from: IPC\nPep. pI: 3.14\nSequence: WQS"\ + "D\n, Original header: Fake1\nNo. peptide:"\ + " 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\ + "Pep. size: 3\nPep. mass: 349.29758\npKa v"\ + "alues from: IPC\nPep. pI: 3.04\nSequence:"\ + " ESD\n, Original header: Fake1\nNo. pepti"\ + "de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\ + "12\nPep. size: 5\nPep. mass: 495.48938\np"\ + "Ka values from: IPC\nPep. pI: 3.14\nSeque"\ + "nce: FZQSD\n, Original header: Fake1\nNo."\ + " peptide: 3\nEnzyme: fake_enzyme1\nCleav."\ + " pos: 15\nPep. size: 3\nPep. mass: 349.29"\ + "758\npKa values from: IPC\nPep. pI: 3.04"\ + "\nSequence: ESD\n, Original header: Fake1"\ + "\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\ + "leav. pos: 16\nPep. size: 1\nPep. mass: 1"\ + "65.19188\npKa values from: IPC\nPep. pI: "\ + "5.97\nSequence: F\n]\n" + assert res[1][0].__repr__() == "Number of cleavage: 0\nNumber of miscleav"\ + "age: 0\nPositions of miscleavage: []\nRat"\ + "io of miscleavage: 0\nPeptides: [Original"\ + " header: Fake2\nNo. peptide: 0\nEnzyme: f"\ + "ake_enzyme1\nCleav. pos: 0\nPep. size: 15"\ + "\nPep. mass: 2014.35098\npKa values from:"\ + " IPC\nPep. pI: 7.16\nSequence: NPHARDORCO"\ + "MPLET\n]\n" # Test fastq file (same result) fastq_file = tmpdir.join("test.fastq") fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@Fa"\ "ke2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n") - res = digest.digest_from_input(str(fastq_file), enzymes, mode) + res = digest.digest_from_input(str(fastq_file), enzymes, mode, aa_pka) assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0.0\nPeptides: "\ "[Original header: Fake1\nNo. peptide: 0\n"\ "Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\ - " size: 4\nPep. mass: 534.52598\nPep. pI: "\ - "3.14\nSequence: WQSD\n, Original header: F"\ - "ake1\nNo. peptide: 1\nEnzyme: fake_enzyme"\ - "1\nCleav. pos: 7\nPep. size: 3\nPep. mass"\ - ": 349.29758\nPep. pI: 3.04\nSequence: ESD"\ - "\n, Original header: Fake1\nNo. peptide: "\ - "2\nEnzyme: fake_enzyme1\nCleav. pos: 12\n"\ - "Pep. size: 5\nPep. mass: 495.48938\nPep. "\ - "pI: 3.14\nSequence: FZQSD\n, Original head"\ - "er: Fake1\nNo. peptide: 3\nEnzyme: fake_e"\ - "nzyme1\nCleav. pos: 15\nPep. size: 3\nPep"\ - ". mass: 349.29758\nPep. pI: 3.04\nSequence"\ - ": ESD\n, Original header: Fake1\nNo. pept"\ - "ide: 4\nEnzyme: fake_enzyme1\nCleav. pos:"\ - " 16\nPep. size: 1\nPep. mass: 165.19188\n"\ - "Pep. pI: 5.97\nSequence: F\n]\n" + " size: 4\nPep. mass: 534.52598\npKa value"\ + "s from: IPC\nPep. pI: 3.14\nSequence: WQS"\ + "D\n, Original header: Fake1\nNo. peptide:"\ + " 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\ + "Pep. size: 3\nPep. mass: 349.29758\npKa v"\ + "alues from: IPC\nPep. pI: 3.04\nSequence:"\ + " ESD\n, Original header: Fake1\nNo. pepti"\ + "de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\ + "12\nPep. size: 5\nPep. mass: 495.48938\np"\ + "Ka values from: IPC\nPep. pI: 3.14\nSeque"\ + "nce: FZQSD\n, Original header: Fake1\nNo."\ + " peptide: 3\nEnzyme: fake_enzyme1\nCleav."\ + " pos: 15\nPep. size: 3\nPep. mass: 349.29"\ + "758\npKa values from: IPC\nPep. pI: 3.04"\ + "\nSequence: ESD\n, Original header: Fake1"\ + "\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\ + "leav. pos: 16\nPep. size: 1\nPep. mass: 1"\ + "65.19188\npKa values from: IPC\nPep. pI: "\ + "5.97\nSequence: F\n]\n" assert res[1][0].__repr__() == "Number of cleavage: 0\nNumber of miscle"\ "avage: 0\nPositions of miscleavage: []"\ "\nRatio of miscleavage: 0\nPeptides: [O"\ "riginal header: Fake2\nNo. peptide: 0\nEn"\ "zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\ - "ize: 15\nPep. mass: 2014.35098\nPep. pI: "\ - "7.16\nSequence: NPHARDORCOMPLET\n]\n" + "ize: 15\nPep. mass: 2014.35098\npKa value"\ + "s from: IPC\nPep. pI: 7.16\nSequence: NPH"\ + "ARDORCOMPLET\n]\n"