Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Nicolas MAILLET
rpg
Commits
409509ee
Commit
409509ee
authored
Jan 21, 2019
by
Nicolas MAILLET
Browse files
Add option for pKa values (-p option) and correct ordre for enzymes
parent
51691249
Pipeline
#9127
passed with stages
in 58 seconds
Changes
12
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
CHANGELOG.rst
View file @
409509ee
=========
CHANGELOG
=========
- 1.0.7
Adding choice for pKa values (option -p)
Fixing alphabetic order for enzymes
- 1.0.6
No default output file, only stdout
...
...
docs/conf.py
View file @
409509ee
...
...
@@ -20,13 +20,13 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__),
# -- Project information -----------------------------------------------------
project
=
'RapidPeptidesGenerator'
copyright
=
'201
8
, Nicolas Maillet'
copyright
=
'201
9
, Nicolas Maillet'
author
=
'Nicolas Maillet'
# The short X.Y version
version
=
''
# The full version, including alpha/beta/rc tags
release
=
'1.0.
6
'
release
=
'1.0.
7
'
# -- General configuration ---------------------------------------------------
...
...
docs/enzymes.rst
View file @
409509ee
...
...
@@ -36,8 +36,8 @@ Available enzymes
28: :ref:`lysc` 29: :ref:`lysn` 30: :ref:`neut`
31: :ref:`ntcb` 32: :ref:`pap` 33: :ref:`peps13`
34: :ref:`peps2` 35: :ref:`prol` 36: :ref:`protk`
37: :ref:`staphI` 38: :ref:`t
ev`
39: :ref:`th
e
rm`
40: :ref:`throm
`
41: :ref:`t
hromsg`
42: :ref:`tryps`
37: :ref:`staphI` 38: :ref:`t
herm`
39: :ref:`thr
o
m`
40: :ref:`throm
sg`
41: :ref:`t
ev`
42: :ref:`tryps`
================== ================== ==================
.. _arg-c:
...
...
@@ -747,23 +747,6 @@ More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.ht
.. _tev:
Tobacco etch virus protease
...........................
Tobacco etch virus protease (TEV) preferentially cleaves after Q (`P1`) when followed by G or S in `P1'` and preceded by Y in `P3` and E in `P6`.
**RPG definition:**
cleaving rule:
* ``(E)()()(Y)()(Q,)(G or S)``
More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV
.. _therm:
Thermolysin
...
...
@@ -836,6 +819,23 @@ More information: see thrombin cleavage kits of
.. _tev:
Tobacco etch virus protease
...........................
Tobacco etch virus protease (TEV) preferentially cleaves after Q (`P1`) when followed by G or S in `P1'` and preceded by Y in `P3` and E in `P6`.
**RPG definition:**
cleaving rule:
* ``(E)()()(Y)()(Q,)(G or S)``
More information: https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV
.. _tryps:
Trypsin
...
...
rpg/RapidPeptidesGenerator.py
View file @
409509ee
...
...
@@ -29,9 +29,9 @@
necessary functions
"""
__version_info__
=
(
'1'
,
'0'
,
'
6
'
)
__version_info__
=
(
'1'
,
'0'
,
'
7
'
)
__version__
=
'.'
.
join
(
__version_info__
)
__revision_date__
=
"201
8-11-19
"
__revision_date__
=
"201
9-01-21
"
__author__
=
"Nicolas Maillet"
import
argparse
...
...
@@ -275,6 +275,9 @@ def main():
"error(s) (--quiet enable, overwrite -v). If output "
"filename already exists, output file will be "
"overwritten."
)
parser
.
add_argument
(
"-p"
,
"--pka"
,
metavar
=
""
,
choices
=
[
'ipc'
,
'stryer'
],
default
=
"ipc"
,
help
=
"Define pKa values. Either 'ipc' "
"or 'stryer (default: ipc)"
)
group_output
=
parser
.
add_mutually_exclusive_group
()
group_output
.
add_argument
(
"-o"
,
"--outputfile"
,
type
=
str
,
metavar
=
""
,
default
=
""
,
help
=
"Optional result file "
...
...
@@ -303,6 +306,11 @@ def main():
mode
=
"concurrent"
args
.
miscleavage
=
[]
# No miscleavage on concurrent, infinite time
# --pka option
aa_pka
=
core
.
AA_PKA_IPC
if
args
.
pka
==
"stryer"
:
aa_pka
=
core
.
AA_PKA_S
# --list option
if
args
.
list
:
list_enzyme
()
...
...
@@ -371,7 +379,7 @@ def main():
# Make the actual digestion of input data
results_digestion
=
digest
.
digest_from_input
(
args
.
inputdata
,
enzymes_to_use
,
mode
)
enzymes_to_use
,
mode
,
aa_pka
)
# Output results
core
.
output_results
(
output_file
,
results_digestion
,
args
.
fmt
,
args
.
quiet
,
...
...
rpg/core.py
View file @
409509ee
...
...
@@ -64,26 +64,27 @@ WATER_MASS = 18.01528
"""Mass of a water molecule."""
# Biochemistry Stryer 7th
#AA_PKA = {"Nterm" : 8.0,
# "C" : 8.3,
# "D" : 4.1,
# "E" : 4.1,
# "H" : 6.0,
# "K" : 10.8,
# "R" : 12.5,
# "Y" : 10.9,
# "Cterm" : 3.1}
AA_PKA_S
=
{
"Nterm"
:
8.0
,
"C"
:
8.3
,
"D"
:
4.1
,
"E"
:
4.1
,
"H"
:
6.0
,
"K"
:
10.8
,
"R"
:
12.5
,
"Y"
:
10.9
,
"Cterm"
:
3.1
}
"""pKa of important amino acid to compute pI (from Stryer)."""
# IPC_peptide
AA_PKA
=
{
"Nterm"
:
9.564
,
"C"
:
8.297
,
"D"
:
3.887
,
"E"
:
4.317
,
"H"
:
6.018
,
"K"
:
10.517
,
"R"
:
12.503
,
"Y"
:
10.071
,
"Cterm"
:
2.383
}
"""pKa of important amino acid to compute pI."""
AA_PKA
_IPC
=
{
"Nterm"
:
9.564
,
"C"
:
8.297
,
"D"
:
3.887
,
"E"
:
4.317
,
"H"
:
6.018
,
"K"
:
10.517
,
"R"
:
12.503
,
"Y"
:
10.071
,
"Cterm"
:
2.383
}
"""pKa of important amino acid to compute pI
(from IPC)
."""
def
handle_errors
(
message
=
""
,
err
=
1
,
error_type
=
""
):
"""Custom handling of errors and warnings.
...
...
rpg/digest.py
View file @
409509ee
...
...
@@ -205,13 +205,15 @@ class ResultOneDigestion:
ret
+=
"C terminal peptide: "
+
self
.
peptides
[
-
1
].
sequence
return
ret
def
one_digest
(
pep
,
enz
):
def
one_digest
(
pep
,
enz
,
aa_pka
):
"""Digest a peptide with an enzyme.
:param pep: peptide to digest
:param enz: enzyme to digest with
:param aa_pka: pKa values (IPC / Stryer)
:type pep: :py:class:`~rpg.sequence.Peptide`
:type enz: :py:class:`~rpg.enzyme.Enzyme`
:type aa_pka: str
:return: result of the digestion
:rtype: :py:class:`ResultOneDigestion`
...
...
@@ -235,7 +237,7 @@ def one_digest(pep, enz):
after
=
False
tmp_seq
=
pep
.
sequence
[
previous_pos
:
pos
]
tmp_peptide
=
sequence
.
Peptide
(
pep
.
header
,
tmp_seq
,
enzyme_name
,
cpt
,
pos
+
original_pos
)
aa_pka
,
cpt
,
pos
+
original_pos
)
ret
.
add_peptide
(
tmp_peptide
)
cpt
+=
1
a_cut_occurs
=
True
...
...
@@ -272,7 +274,8 @@ def one_digest(pep, enz):
if
previous_pos
!=
pos
:
tmp_seq
=
pep
.
sequence
[
previous_pos
:
pos
]
tmp_peptide
=
sequence
.
Peptide
(
pep
.
header
,
tmp_seq
,
enzyme_name
,
cpt
,
enzyme_name
,
aa_pka
,
cpt
,
pos
+
original_pos
)
ret
.
add_peptide
(
tmp_peptide
)
cpt
+=
1
...
...
@@ -291,45 +294,49 @@ def one_digest(pep, enz):
if
a_cut_occurs
:
tmp_pos
=
len
(
pep
.
sequence
)
# Last portion of protein
tmp_seq
=
pep
.
sequence
[
previous_pos
:]
tmp_peptide
=
sequence
.
Peptide
(
pep
.
header
,
tmp_seq
,
enzyme_name
,
cpt
,
tmp_pos
+
original_pos
)
tmp_peptide
=
sequence
.
Peptide
(
pep
.
header
,
tmp_seq
,
enzyme_name
,
aa_pka
,
cpt
,
tmp_pos
+
original_pos
)
ret
.
add_peptide
(
tmp_peptide
)
# Not cut, don't change the peptide
else
:
ret
.
add_peptide
(
pep
)
return
ret
def
digest_one_sequence
(
seq
,
enz
,
mode
):
def
digest_one_sequence
(
seq
,
enz
,
mode
,
aa_pka
):
"""Launch a digest procedure on one sequence.
:param sequence: sequence to digest
:param enz: enzymes to digest with
:param mode: digestion mode (concurrent / sequential)
:param aa_pka: pKa values (IPC / Stryer)
:type sequence: :py:class:`~rpg.sequence.Sequence`
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type mode: str
:type aa_pka: str
:return: result of the digestion
:rtype: list(:py:class:`ResultOneDigestion`)
"""
ret
=
None
if
mode
==
"sequential"
:
ret
=
sequential_digest
(
seq
,
enz
)
ret
=
sequential_digest
(
seq
,
enz
,
aa_pka
)
elif
mode
==
"concurrent"
:
ret
=
concurrent_digest
(
seq
,
enz
)
ret
=
concurrent_digest
(
seq
,
enz
,
aa_pka
)
else
:
core
.
handle_errors
(
"not able to understand digetion mode. Switching "
"to 'sequential'."
)
ret
=
sequential_digest
(
seq
,
enz
)
ret
=
sequential_digest
(
seq
,
enz
,
aa_pka
)
return
ret
def
sequential_digest
(
seq
,
enz
):
def
sequential_digest
(
seq
,
enz
,
aa_pka
):
"""Sequentially digest a sequence with all Enzymes, **one by one**.
:param seq: sequence to digest
:param enz: enzymes to digest with
:param aa_pka: pKa values (IPC / Stryer)
:type seq: :py:class:`~rpg.sequence.Sequence`
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type aa_pka: str
:return: result of the digestion
:rtype: list(:py:class:`ResultOneDigestion`)
...
...
@@ -338,19 +345,22 @@ def sequential_digest(seq, enz):
# Check each enzymes
for
an_enz
in
enz
:
# Create a fake peptide from input sequence
fake_peptide
=
sequence
.
Peptide
(
seq
.
header
,
seq
.
sequence
,
an_enz
.
name
)
fake_peptide
=
sequence
.
Peptide
(
seq
.
header
,
seq
.
sequence
,
an_enz
.
name
,
aa_pka
)
# Digest it
ret
.
append
(
one_digest
(
fake_peptide
,
an_enz
))
ret
.
append
(
one_digest
(
fake_peptide
,
an_enz
,
aa_pka
))
return
ret
def
concurrent_digest
(
seq
,
enz
):
def
concurrent_digest
(
seq
,
enz
,
aa_pka
):
"""Concurrently digest a sequence with all Enzymes **at the same
time**.
:param seq: sequence to digest
:param enz: enzymes to digest with
:param aa_pka: pKa values (IPC / Stryer)
:type seq: :py:class:`~rpg.sequence.Sequence`
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type aa_pka: str
:return: result of the digestion
:rtype: list(:py:class:`ResultOneDigestion`)
...
...
@@ -363,7 +373,7 @@ def concurrent_digest(seq, enz):
enzymes_name_to_write
=
enzymes_name_to_write
[:
-
1
]
# First peptide is the sequence itself
fake_peptide
=
sequence
.
Peptide
(
seq
.
header
,
seq
.
sequence
,
enzymes_name_to_write
)
enzymes_name_to_write
,
aa_pka
)
# Result is currently just the sequence (list of one peptide)
result
=
ResultOneDigestion
(
enzymes_name_to_write
,
[
fake_peptide
])
# Do we digest as much as we can?
...
...
@@ -380,7 +390,8 @@ def concurrent_digest(seq, enz):
# remove them from the global result
for
peptide
in
result
.
pop_peptides
():
# Digest it, return a list of ResultOneDigestion
all_res_digestion_tmp
.
append
(
one_digest
(
peptide
,
an_enz
))
all_res_digestion_tmp
.
append
(
one_digest
(
peptide
,
an_enz
,
aa_pka
))
# Merge the result of digestion with previous result
for
i
in
all_res_digestion_tmp
:
result
.
merge
(
i
)
...
...
@@ -398,16 +409,18 @@ def concurrent_digest(seq, enz):
# it will be one result by enzyme
return
[
result
]
def
digest_from_input
(
input_data
,
enz
,
mode
):
def
digest_from_input
(
input_data
,
enz
,
mode
,
aa_pka
):
"""Digest all sequences of input data with
selected enzymes and mode.
:param input_data: either a sequence or a file of sequence (fasta/fastq)
:param enz: enzymes to digest with
:param mode: digestion mode (concurrent / sequential)
:param aa_pka: pKa values (IPC / Stryer)
:type input_data: str
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type mode: str
:type aa_pka: str
:return: result of digestions
:rtype: list(list(:py:class:`ResultOneDigestion`))
...
...
@@ -436,7 +449,7 @@ def digest_from_input(input_data, enz, mode):
sequence
.
check_sequence
(
seq
))
# Digest sequence
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
))
(
tmp_seq
,
enz
,
mode
,
aa_pka
))
seq
=
""
header
=
tmp_line
tmp_line
=
in_file
.
readline
().
strip
()
...
...
@@ -445,7 +458,7 @@ def digest_from_input(input_data, enz, mode):
sequence
.
check_sequence
(
seq
))
# Digest it
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
))
mode
,
aa_pka
))
# Fastq file
elif
header_first_car
==
"@"
:
header
=
in_file
.
readline
().
strip
()
...
...
@@ -454,9 +467,8 @@ def digest_from_input(input_data, enz, mode):
tmp_seq
=
sequence
.
Sequence
(
header
[
1
:],
sequence
.
check_sequence
(
seq
))
# Digest sequence
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
))
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
,
aa_pka
))
in_file
.
readline
()
in_file
.
readline
()
header
=
in_file
.
readline
().
strip
()
...
...
@@ -468,6 +480,7 @@ def digest_from_input(input_data, enz, mode):
tmp_seq
=
sequence
.
Sequence
(
"Input"
,
sequence
.
check_sequence
(
input_data
))
# Digest the sequence
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
))
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
,
aa_pka
))
# Return all peptides
return
results_digestion
rpg/enzymes_definition.py
View file @
409509ee
...
...
@@ -931,7 +931,7 @@ ENZ.append(AFTER_G)
ENZ
.
append
(
AFTER_S
)
ENZ
.
append
(
AFTER_E
)
ENZ
.
append
(
AFTER_Y
)
ENZYME
=
enzyme
.
Enzyme
(
CPT_ENZ
,
"
Papa
in"
,
ENZ
,
0
)
ENZYME
=
enzyme
.
Enzyme
(
CPT_ENZ
,
"
Fic
in"
,
ENZ
,
0
)
# Add it to available enzymes
AVAILABLE_ENZYMES
.
append
(
ENZYME
)
CPT_ENZ
+=
1
...
...
@@ -1398,42 +1398,6 @@ CPT_ENZ += 1
# Tobacco etch virus protease
# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV
# RULES: cleaves between Q (P1) and G or S in P1' when Y in P3 and E in P6.
# RULES: cleaves after E-Xaa-Xaa-Y-Xaa-Q-(G/S)
ENZ
=
[]
# Cutting rule
AFTER_Q
=
rule
.
Rule
(
0
,
"Q"
,
False
,
1
)
# Never cleaves after Q, except...
# Exceptions
EXECPT_QG
=
rule
.
Rule
(
1
,
"G"
,
False
,
-
1
)
# Never cleaves after Q, followed by G
EXECPT_QS
=
rule
.
Rule
(
1
,
"S"
,
False
,
-
1
)
# Never cleaves after Q, followed by S
EXECPT_Y_Qx
=
rule
.
Rule
(
-
2
,
"Y"
,
False
,
-
1
)
# Never cleaves after Q, followed by G/S, preceded by Y
EXECPT_E__Y_Qx
=
rule
.
Rule
(
-
5
,
"E"
,
True
,
-
1
)
# Always cleaves after Q, followed by G/S, preceded by Y and preceded by E
# Add exception to cutting rules:
EXECPT_Y_Qx
.
rules
.
append
(
EXECPT_E__Y_Qx
)
EXECPT_QG
.
rules
.
append
(
EXECPT_Y_Qx
)
EXECPT_QS
.
rules
.
append
(
EXECPT_Y_Qx
)
# Add exception to cutting rules
AFTER_Q
.
rules
.
append
(
EXECPT_QG
)
AFTER_Q
.
rules
.
append
(
EXECPT_QS
)
# Add rules to enzyme
ENZ
.
append
(
AFTER_Q
)
ENZYME
=
enzyme
.
Enzyme
(
CPT_ENZ
,
"Tobacco-Etch-Virus"
,
ENZ
,
0
)
# Add it to available enzymes
AVAILABLE_ENZYMES
.
append
(
ENZYME
)
CPT_ENZ
+=
1
# Thermolysin
# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#Therm
# RULES: cleaves before A,F,I,L,M or V (P1') not preceded by D or E in P1 and not followed by P in P2'
...
...
@@ -1693,6 +1657,42 @@ CPT_ENZ += 1
# Tobacco etch virus protease
# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#TEV
# RULES: cleaves between Q (P1) and G or S in P1' when Y in P3 and E in P6.
# RULES: cleaves after E-Xaa-Xaa-Y-Xaa-Q-(G/S)
ENZ
=
[]
# Cutting rule
AFTER_Q
=
rule
.
Rule
(
0
,
"Q"
,
False
,
1
)
# Never cleaves after Q, except...
# Exceptions
EXECPT_QG
=
rule
.
Rule
(
1
,
"G"
,
False
,
-
1
)
# Never cleaves after Q, followed by G
EXECPT_QS
=
rule
.
Rule
(
1
,
"S"
,
False
,
-
1
)
# Never cleaves after Q, followed by S
EXECPT_Y_Qx
=
rule
.
Rule
(
-
2
,
"Y"
,
False
,
-
1
)
# Never cleaves after Q, followed by G/S, preceded by Y
EXECPT_E__Y_Qx
=
rule
.
Rule
(
-
5
,
"E"
,
True
,
-
1
)
# Always cleaves after Q, followed by G/S, preceded by Y and preceded by E
# Add exception to cutting rules:
EXECPT_Y_Qx
.
rules
.
append
(
EXECPT_E__Y_Qx
)
EXECPT_QG
.
rules
.
append
(
EXECPT_Y_Qx
)
EXECPT_QS
.
rules
.
append
(
EXECPT_Y_Qx
)
# Add exception to cutting rules
AFTER_Q
.
rules
.
append
(
EXECPT_QG
)
AFTER_Q
.
rules
.
append
(
EXECPT_QS
)
# Add rules to enzyme
ENZ
.
append
(
AFTER_Q
)
ENZYME
=
enzyme
.
Enzyme
(
CPT_ENZ
,
"Tobacco-Etch-Virus"
,
ENZ
,
0
)
# Add it to available enzymes
AVAILABLE_ENZYMES
.
append
(
ENZYME
)
CPT_ENZ
+=
1
# Trypsin
# https://web.expasy.org/peptide_cutter/peptidecutter_enzymes.html#Tryps
# RULES: after K except if next aa is P. This rule doesn't apply if W is before K
...
...
rpg/sequence.py
View file @
409509ee
...
...
@@ -33,11 +33,13 @@ class Peptide:
:param header: header of the peptide
:param sequence: sequence in amino acids
:param enzyme_name: name of the enzyme used
:param aa_pka: pKa values (IPC / Stryer)
:param nb_peptide: number of this peptide (default: 0)
:param position: position of cleavage on the original sequence (default: 0)
:type header: str
:type sequence: str
:type enzyme_name: str
:type aa_pka: str
:type nb_peptide: int
:type position: int
...
...
@@ -48,10 +50,12 @@ class Peptide:
:vartype mass: float
:vartype p_i: float
"""
def
__init__
(
self
,
header
,
sequence
,
enzyme_name
,
nb_peptide
=
0
,
position
=
0
):
def
__init__
(
self
,
header
,
sequence
,
enzyme_name
,
aa_pka
,
nb_peptide
=
0
,
position
=
0
):
self
.
header
=
header
# header of this peptide
self
.
sequence
=
sequence
# peptide sequence
self
.
enzyme_name
=
enzyme_name
# name of the enzyme used
self
.
aa_pka
=
aa_pka
# pKa values for pI calculation
self
.
nb_peptide
=
nb_peptide
# number of this peptide
self
.
position
=
position
# position of cleavage
self
.
size
=
len
(
sequence
)
# size of the peptide
...
...
@@ -64,11 +68,15 @@ class Peptide:
# self representation for print
def
__repr__
(
self
):
pka
=
"IPC"
if
self
.
aa_pka
==
core
.
AA_PKA_S
:
pka
=
"Stryer"
return
"Original header: "
+
self
.
header
+
"
\n
No. peptide: "
+
\
str
(
self
.
nb_peptide
)
+
"
\n
Enzyme: "
+
self
.
enzyme_name
+
\
"
\n
Cleav. pos: "
+
str
(
self
.
position
)
+
"
\n
Pep. size: "
+
\
str
(
self
.
size
)
+
"
\n
Pep. mass: "
+
str
(
self
.
mass
)
+
"
\n
Pep. pI: "
\
+
str
(
self
.
p_i
)
+
"
\n
Sequence: "
+
self
.
sequence
+
"
\n
"
str
(
self
.
size
)
+
"
\n
Pep. mass: "
+
str
(
self
.
mass
)
+
\
"
\n
pKa values from: "
+
pka
+
"
\n
Pep. pI: "
+
str
(
self
.
p_i
)
+
\
"
\n
Sequence: "
+
self
.
sequence
+
"
\n
"
# Equality between two Peptides
def
__eq__
(
self
,
other
):
...
...
@@ -117,22 +125,22 @@ class Peptide:
# While we are not precise enough
while
(
ph_val
-
ph_min
>
precision
)
or
(
ph_max
-
ph_val
>
precision
):
# Compute the pI
qn1
=
-
1.0
/
(
1.0
+
pow
(
10
,
(
core
.
AA_PKA
[
"Cterm"
]
-
ph_val
)))
qn2
=
-
self
.
sequence
.
count
(
'D'
)
/
(
1.0
+
pow
(
10
,
(
core
.
AA_PKA
[
"D"
]
-
qn1
=
-
1.0
/
(
1.0
+
pow
(
10
,
(
self
.
aa_pka
[
"Cterm"
]
-
ph_val
)))
qn2
=
-
self
.
sequence
.
count
(
'D'
)
/
(
1.0
+
pow
(
10
,
(
self
.
aa_pka
[
"D"
]
-
ph_val
)))
qn3
=
-
self
.
sequence
.
count
(
'E'
)
/
(
1.0
+
pow
(
10
,
(
core
.
AA_PKA
[
"E"
]
-
qn3
=
-
self
.
sequence
.
count
(
'E'
)
/
(
1.0
+
pow
(
10
,
(
self
.
aa_pka
[
"E"
]
-
ph_val
)))
qn4
=
-
self
.
sequence
.
count
(
'C'
)
/
(
1.0
+
pow
(
10
,
(
core
.
AA_PKA
[
"C"
]
-
qn4
=
-
self
.
sequence
.
count
(
'C'
)
/
(
1.0
+
pow
(
10
,
(
self
.
aa_pka
[
"C"
]
-
ph_val
)))
qn5
=
-
self
.
sequence
.
count
(
'Y'
)
/
(
1.0
+
pow
(
10
,
(
core
.
AA_PKA
[
"Y"
]
-
qn5
=
-
self
.
sequence
.
count
(
'Y'
)
/
(
1.0
+
pow
(
10
,
(
self
.
aa_pka
[
"Y"
]
-
ph_val
)))
qp1
=
self
.
sequence
.
count
(
'H'
)
/
(
1.0
+
pow
(
10
,
(
ph_val
-
core
.
AA_PKA
[
"H"
])))
qp2
=
1.0
/
(
1.0
+
pow
(
10
,
(
ph_val
-
core
.
AA_PKA
[
"Nterm"
])))
self
.
aa_pka
[
"H"
])))
qp2
=
1.0
/
(
1.0
+
pow
(
10
,
(
ph_val
-
self
.
aa_pka
[
"Nterm"
])))
qp3
=
self
.
sequence
.
count
(
'K'
)
/
(
1.0
+
pow
(
10
,
(
ph_val
-
core
.
AA_PKA
[
"K"
])))
self
.
aa_pka
[
"K"
])))
qp4
=
self
.
sequence
.
count
(
'R'
)
/
(
1.0
+
pow
(
10
,
(
ph_val
-
core
.
AA_PKA
[
"R"
])))
self
.
aa_pka
[
"R"
])))
nq_final
=
qn1
+
qn2
+
qn3
+
qn4
+
qn5
+
qp1
+
qp2
+
qp3
+
qp4
# We are below solution, good pH value must be smaller
if
nq_final
<
0.0
:
...
...
setup.py
View file @
409509ee
...
...
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
_MAJOR
=
1
_MINOR
=
0
_MICRO
=
6
_MICRO
=
7
version
=
'%d.%d.%d'
%
(
_MAJOR
,
_MINOR
,
_MICRO
)
release
=
'%d.%d'
%
(
_MAJOR
,
_MINOR
)
...
...
tests/test_RapidPeptidesGenerator.py
View file @
409509ee
...
...
@@ -49,17 +49,15 @@ def test_list_enzyme(capsys):
RapidPeptidesGenerator
.
list_enzyme
()
out
,
_
=
capsys
.
readouterr
()
assert
out
==
"1: Arg-C
\n
2: Asp-N
\n
3: BNPS-Skatole
\n
4: Bromelain
\n
5: Casp"
\
"ase-1
\n
6: Caspase-2
\n
7: Caspase-3
\n
8: Caspase-4
\n
9: Caspas"
\
"e-5
\n
10: Caspase-6
\n
11: Caspase-7
\n
12: Caspase-8
\n
13: Casp"
\
"ase-9
\n
14: Caspase-10
\n
15: Chymotrypsin-high
\n
16: Chymotry"
\
"psin-low
\n
17: Clostripain
\n
18: CNBr
\n
19: Enterokinase
\n
20:"
\
" Factor-Xa
\n
21: Papain
\n
22: Formic-acid
\n
23: Glu-C
\n
24: Gl"
\
"utamyl-endopeptidase
\n
25: Granzyme-B
\n
26: Hydroxylamine
\n
2"
\
"7: Iodosobenzoic-acid
\n
28: Lys-C
\n
29: Lys-N
\n
30: Neutrophi"
\
"l-elastase
\n
31: NTCB
\n
32: Papain
\n
33: Pepsin-pH1.3
\n
34: Pe"
\
"psin-pH>=2
\n
35: Proline-endopeptidase
\n
36: Proteinase-K
\n
3"
\
"7: Staphylococcal-peptidase-I
\n
38: Tobacco-Etch-Virus
\n
39:"
\
" Thermolysin
\n
40: Thrombin
\n
41: Thrombin-SG
\n
42: Trypsin
\n
"
"ase-1
\n
6: Caspase-2
\n
7: Caspase-3
\n
8: Caspase-4
\n
9: Caspase-5
\n
10: Caspa"
\
"se-6
\n
11: Caspase-7
\n
12: Caspase-8
\n
13: Caspase-9
\n
14: Caspase-10
\n
15: C"
\
"hymotrypsin-high
\n
16: Chymotrypsin-low
\n
17: Clostripain
\n
18: CNBr
\n
19: E"
\
"nterokinase
\n
20: Factor-Xa
\n
21: Ficin
\n
22: Formic-acid
\n
23: Glu-C
\n
24: G"
\
"lutamyl-endopeptidase
\n
25: Granzyme-B
\n
26: Hydroxylamine
\n
27: Iodosobenz"
\
"oic-acid
\n
28: Lys-C
\n
29: Lys-N
\n
30: Neutrophil-elastase
\n
31: NTCB
\n
32: P"
\
"apain
\n
33: Pepsin-pH1.3
\n
34: Pepsin-pH>=2
\n
35: Proline-endopeptidase
\n
36"
\
": Proteinase-K
\n
37: Staphylococcal-peptidase-I
\n
38: Thermolysin
\n
39: Thr"
\
"ombin
\n
40: Thrombin-SG
\n
41: Tobacco-Etch-Virus
\n
42: Trypsin
\n
"
def
test_create_enzymes_to_use
(
capsys
):
"""Test function 'create_enzymes_to_use(enzymes, miscleavage)'"""
...
...
tests/test_core.py
View file @
409509ee
...
...
@@ -75,7 +75,8 @@ def test_output_results(capsys, tmpdir):
# CSV output
seq
=
"WQSDESDFZQSDESDF"
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
)
aa_pka
=
core
.
AA_PKA_IPC
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
False
...
...
@@ -127,7 +128,7 @@ def test_output_results(capsys, tmpdir):
# CSV output in quiet
seq
=
"WQSDESDFZQSDESDF"
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
)
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
True
...
...
@@ -148,7 +149,7 @@ def test_output_results(capsys, tmpdir):
# CSV output in verbose > 2
seq
=
"WQSDESDFZQSDESDF"
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
)
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
False
...
...
@@ -176,23 +177,25 @@ def test_peptide():
header
=
"Test"
seq
=
"QWSDESDF"
enz_name
=
"fake_enzyme"
pep0
=
sequence
.
Peptide
(
header
,
seq
,
enz_name
,
1
,
3
)
aa_pka
=
core
.
AA_PKA_IPC
pep0
=
sequence
.
Peptide
(
header
,
seq
,
enz_name
,
aa_pka
,
1
,
3
)
# Test function '__repr__()'
print_res
=
pep0
.
__repr__
()
assert
print_res
==
"Original header: Test
\n
No. peptide: 1
\n
Enzyme: fake_"
\
"enzyme
\n
Cleav. pos: 3
\n
Pep. size: 8
\n
Pep. mass: 1012"
\