Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Nicolas MAILLET
rpg
Commits
df253c38
Commit
df253c38
authored
Apr 03, 2019
by
Nicolas MAILLET
Browse files
v 1.1.0 - change input option -i (file only) and add -s (sequence)
parent
54c61dd0
Pipeline
#10913
passed with stages
in 58 seconds
Changes
7
Pipelines
2
Hide whitespace changes
Inline
Side-by-side
CHANGELOG.rst
View file @
df253c38
=========
CHANGELOG
=========
- 1.1.0
Modify input. Now, option -i only takes files. Use option -s to input sequence
- 1.0.9
Correct a bug of random dict in the creation of new enzyme
...
...
docs/conf.py
View file @
df253c38
...
...
@@ -26,7 +26,7 @@ author = 'Nicolas Maillet'
# The short X.Y version
version
=
''
# The full version, including alpha/beta/rc tags
release
=
'1.
0.9
'
release
=
'1.
1.0
'
# -- General configuration ---------------------------------------------------
...
...
rpg/RapidPeptidesGenerator.py
View file @
df253c38
...
...
@@ -29,9 +29,9 @@
necessary functions
"""
__version_info__
=
(
'1'
,
'
0
'
,
'
9
'
)
__version_info__
=
(
'1'
,
'
1
'
,
'
0
'
)
__version__
=
'.'
.
join
(
__version_info__
)
__revision_date__
=
"2019-0
3
-0
7
"
__revision_date__
=
"2019-0
4
-0
3
"
__author__
=
"Nicolas Maillet"
import
argparse
...
...
@@ -260,10 +260,11 @@ def main():
help
=
"Output file format. Either 'fasta', 'csv', or "
"'tsv' (default: fasta)"
)
group_launch
.
add_argument
(
"-i"
,
"--inputdata"
,
metavar
=
""
,
help
=
"Input file, in fasta / fastq format or a "
"single protein sequence without commentary"
)
help
=
"Input file, in fasta / fastq format"
)
group_launch
.
add_argument
(
"-l"
,
"--list"
,
action
=
"store_true"
,
help
=
"Display the list of available enzymes"
)
group_launch
.
add_argument
(
"-s"
,
"--sequence"
,
metavar
=
""
,
help
=
"Input a single protein sequence without commentary"
)
parser
.
add_argument
(
"-m"
,
"--miscleavage"
,
metavar
=
""
,
default
=
[],
nargs
=
'+'
,
type
=
restricted_float
,
help
=
"Percentage of miscleavage, between 0 and 100,"
...
...
@@ -321,6 +322,20 @@ def main():
args
.
quiet
=
1
args
.
verbose
=
0
# input data
input_data
=
None
input_type
=
None
if
args
.
inputdata
:
if
os
.
path
.
isfile
(
args
.
inputdata
):
input_data
=
args
.
inputdata
input_type
=
"file"
else
:
core
.
handle_errors
(
"file not found (%s)."
%
args
.
inputdata
,
0
,
"I"
\
"nput "
)
elif
args
.
sequence
:
input_data
=
args
.
sequence
input_type
=
"sequence"
# --outputfile / --randomname options
output_file
=
""
# No output file (default)
if
args
.
randomname
:
...
...
@@ -369,7 +384,7 @@ def main():
# Output options
if
args
.
verbose
:
print
(
"Input: "
+
args
.
inputdata
)
print
(
"Input: "
+
input
_
data
)
print
(
"Enzyme(s) used: "
+
str
([
enz
.
name
for
enz
in
enzymes_to_use
]))
print
(
"Mode: "
+
mode
)
print
(
"miscleavage ratio: "
+
...
...
@@ -378,7 +393,7 @@ def main():
print
(
"Output file: "
+
os
.
path
.
abspath
(
output_file
))
# Make the actual digestion of input data
results_digestion
=
digest
.
digest_from_input
(
args
.
inputdata
,
results_digestion
=
digest
.
digest_from_input
(
input
_
data
,
input_type
,
enzymes_to_use
,
mode
,
aa_pka
)
# Output results
...
...
rpg/digest.py
View file @
df253c38
...
...
@@ -323,7 +323,7 @@ def digest_one_sequence(seq, enz, mode, aa_pka):
elif
mode
==
"concurrent"
:
ret
=
concurrent_digest
(
seq
,
enz
,
aa_pka
)
else
:
core
.
handle_errors
(
"not able to understand digetion mode. Switching "
core
.
handle_errors
(
"not able to understand dige
s
tion mode. Switching "
"to 'sequential'."
)
ret
=
sequential_digest
(
seq
,
enz
,
aa_pka
)
return
ret
...
...
@@ -409,15 +409,17 @@ def concurrent_digest(seq, enz, aa_pka):
# it will be one result by enzyme
return
[
result
]
def
digest_from_input
(
input_data
,
enz
,
mode
,
aa_pka
):
def
digest_from_input
(
input_data
,
input_type
,
enz
,
mode
,
aa_pka
):
"""Digest all sequences of input data with
selected enzymes and mode.
:param input_data: either a sequence or a file of sequence (fasta/fastq)
:param input_data: either a sequence or the path of a file of sequence (fasta/fastq)
:param input_type: either 'sequence' or 'file'
:param enz: enzymes to digest with
:param mode: digestion mode (concurrent / sequential)
:param aa_pka: pKa values (IPC / Stryer)
:type input_data: str
:type input_type: str
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type mode: str
:type aa_pka: str
...
...
@@ -428,7 +430,7 @@ def digest_from_input(input_data, enz, mode, aa_pka):
# Results of digestion
results_digestion
=
[]
# Input is a file?
if
os
.
path
.
isfile
(
input_data
)
:
if
input_type
==
"file"
:
with
open
(
input_data
)
as
in_file
:
header_first_car
=
in_file
.
read
(
1
)
in_file
.
seek
(
0
)
...
...
@@ -476,11 +478,15 @@ def digest_from_input(input_data, enz, mode, aa_pka):
core
.
handle_errors
(
"input file format not recognized (%s)."
%
header_first_car
,
0
,
"Input "
)
# input is a single sequence
el
se
:
el
if
input_type
==
"sequence"
:
tmp_seq
=
sequence
.
Sequence
(
"Input"
,
sequence
.
check_sequence
(
input_data
))
# Digest the sequence
results_digestion
.
append
(
digest_one_sequence
(
tmp_seq
,
enz
,
mode
,
aa_pka
))
# bad input
else
:
core
.
handle_errors
(
"input type not recognized (%s)."
%
input_type
,
0
,
"Input "
)
# Return all peptides
return
results_digestion
setup.py
View file @
df253c38
...
...
@@ -3,8 +3,8 @@ import os
from
setuptools
import
setup
,
find_packages
_MAJOR
=
1
_MINOR
=
0
_MICRO
=
9
_MINOR
=
1
_MICRO
=
0
version
=
'%d.%d.%d'
%
(
_MAJOR
,
_MINOR
,
_MICRO
)
release
=
'%d.%d'
%
(
_MAJOR
,
_MINOR
)
...
...
tests/test_core.py
View file @
df253c38
...
...
@@ -76,7 +76,8 @@ def test_output_results(capsys, tmpdir):
# CSV output
seq
=
"WQSDESDFZQSDESDF"
aa_pka
=
core
.
AA_PKA_IPC
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
"sequence"
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
False
...
...
@@ -86,12 +87,12 @@ def test_output_results(capsys, tmpdir):
assert
out
==
output_file
.
read
()
assert
output_file
.
read
()
==
"Original_header,No_peptide,Enzyme,Cleaving_"
\
"pos,Peptide_size,Peptide_mass,pI,Sequence
\n
"
\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS
D
"
\
"
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,
ES
"
\
"D
\n
Input,2,fake_enzyme1,12,5,495.48938,3.
14,
"
\
"FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297
58,
"
\
"3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165.
1918
"
\
"8,5.97,F
\n
"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"
\
"
D
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,"
\
"
ES
D
\n
Input,2,fake_enzyme1,12,5,495.48938,3."
\
"
14,
FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297"
\
"
58,
3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165."
\
"
1918
8,5.97,F
\n
"
# TSV output
output_file
=
tmpdir
.
join
(
"test_result.tsv"
)
...
...
@@ -104,12 +105,12 @@ def test_output_results(capsys, tmpdir):
assert
output_file
.
read
()
==
"Original_header
\t
No_peptide
\t
Enzyme
\t
Cleavi"
\
"ng_pos
\t
Peptide_size
\t
Peptide_mass
\t
pI
\t
Seq"
\
"uence
\n
Input
\t
0
\t
fake_enzyme1
\t
4
\t
4
\t
534.52"
\
"598
\t
3.14
\t
WQSD
\n
Input
\t
1
\t
fake_enzyme1
\t
7
\t
"
\
"3
\t
349.29758
\t
3.04
\t
ESD
\n
Input
\t
2
\t
fake_e
nzy
"
\
"me1
\t
12
\t
5
\t
495.48938
\t
3.14
\t
FZQSD
\n
Inpu
t
\t
3
"
\
"
\t
fake_enzyme1
\t
15
\t
3
\t
349.29758
\t
3.04
\t
ESD
"
\
"
\n
Input
\t
4
\t
fake_enzyme1
\t
16
\t
1
\t
165.1
9188
"
\
"
\t
5.97
\t
F
\n
"
"598
\t
3.14
\t
WQSD
\n
Input
\t
1
\t
fake_enzyme1
\t
7"
\
"
\t
3
\t
349.29758
\t
3.04
\t
ESD
\n
Input
\t
2
\t
fake_e"
\
"
nzy
me1
\t
12
\t
5
\t
495.48938
\t
3.14
\t
FZQSD
\n
Inpu"
\
"
t
\t
3
\t
fake_enzyme1
\t
15
\t
3
\t
349.29758
\t
3.04"
\
"
\
t
ESD
\
n
Input
\t
4
\t
fake_enzyme1
\t
16
\t
1
\t
165.1"
\
"
9188
\t
5.97
\t
F
\n
"
# Fasta output
output_file
=
tmpdir
.
join
(
"test_result.fasta"
)
...
...
@@ -119,16 +120,17 @@ def test_output_results(capsys, tmpdir):
core
.
output_results
(
str
(
output_file
),
all_seq_digested
,
fmt
,
quiet
,
verbose
)
out
,
err
=
capsys
.
readouterr
()
assert
out
==
output_file
.
read
()
assert
output_file
.
read
()
==
">Input_0_fake_enzyme1_4_4_534.52598_3.14
\n
W
Q
"
\
"SD
\n
>Input_1_fake_enzyme1_7_3_349.29758_3.
04
"
\
"
\n
ESD
\n
>Input_2_fake_enzyme1_12_5_495.489
38
"
\
"_3.14
\n
FZQSD
\n
>Input_3_fake_enzyme1_15_3_
349
"
\
".29758_3.04
\n
ESD
\n
>Input_4_fake_enzyme1_
16_1
"
\
"_165.19188_5.97
\n
F
\n
"
assert
output_file
.
read
()
==
">Input_0_fake_enzyme1_4_4_534.52598_3.14
\n
W"
\
"
Q
SD
\n
>Input_1_fake_enzyme1_7_3_349.29758_3."
\
"
04
\n
ESD
\n
>Input_2_fake_enzyme1_12_5_495.489"
\
"
38
_3.14
\n
FZQSD
\n
>Input_3_fake_enzyme1_15_3_"
\
"
349
.29758_3.04
\n
ESD
\n
>Input_4_fake_enzyme1_"
\
"
16_1
_165.19188_5.97
\n
F
\n
"
# CSV output in quiet
seq
=
"WQSDESDFZQSDESDF"
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
"sequence"
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
True
...
...
@@ -140,16 +142,17 @@ def test_output_results(capsys, tmpdir):
assert
out
==
""
assert
output_file
.
read
()
==
"Original_header,No_peptide,Enzyme,Cleaving_"
\
"pos,Peptide_size,Peptide_mass,pI,Sequence
\n
"
\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS
D
"
\
"
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,
ES
"
\
"D
\n
Input,2,fake_enzyme1,12,5,495.48938,3.
14,
"
\
"FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297
58,
"
\
"3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165.
1918
"
\
"8,5.97,F
\n
"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"
\
"
D
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,"
\
"
ES
D
\n
Input,2,fake_enzyme1,12,5,495.48938,3."
\
"
14,
FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297"
\
"
58,
3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165."
\
"
1918
8,5.97,F
\n
"
# CSV output in verbose > 2
seq
=
"WQSDESDFZQSDESDF"
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
all_seq_digested
=
digest
.
digest_from_input
(
seq
,
"sequence"
,
enzymes
,
mode
,
aa_pka
)
output_file
=
tmpdir
.
join
(
"test_result.csv"
)
fmt
=
"csv"
quiet
=
False
...
...
@@ -159,12 +162,12 @@ def test_output_results(capsys, tmpdir):
out
,
err
=
capsys
.
readouterr
()
assert
output_file
.
read
()
==
"Original_header,No_peptide,Enzyme,Cleaving_"
\
"pos,Peptide_size,Peptide_mass,pI,Sequence
\n
"
\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS
D
"
\
"
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,
ES
"
\
"D
\n
Input,2,fake_enzyme1,12,5,495.48938,3.
14,
"
\
"FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297
58,
"
\
"3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165.
1918
"
\
"8,5.97,F
\n
"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"
\
"
D
\n
Input,1,fake_enzyme1,7,3,349.29758,3.04,"
\
"
ES
D
\n
Input,2,fake_enzyme1,12,5,495.48938,3."
\
"
14,
FZQSD
\n
Input,3,fake_enzyme1,15,3,349.297"
\
"
58,
3.04,ESD
\n
Input,4,fake_enzyme1,16,1,165."
\
"
1918
8,5.97,F
\n
"
# Verbose > 2
assert
out
==
"
\n
Number of cleavage: 4
\n
Cleavage position: 4, 7, 12, 15
\n
"
\
"Number of miscleavage: 0
\n
miscleavage position:
\n
mis"
\
...
...
tests/test_digest.py
View file @
df253c38
...
...
@@ -342,7 +342,7 @@ def test_digest_from_input(capsys, tmpdir):
# Test wrong file
with
pytest
.
raises
(
SystemExit
)
as
pytest_wrapped_e
:
digest
.
digest_from_input
(
str
(
Path
.
home
())
+
"/rpg_user.py"
,
digest
.
digest_from_input
(
str
(
Path
.
home
())
+
"/rpg_user.py"
,
"file"
,
enzymes
,
mode
,
aa_pka
)
_
,
err
=
capsys
.
readouterr
()
assert
err
==
"Input Error: input file format not recognized (f).
\n
"
...
...
@@ -351,7 +351,7 @@ def test_digest_from_input(capsys, tmpdir):
# Test input data
seq
=
"WQSDESDFZQSDESDF"
res
=
digest
.
digest_from_input
(
seq
,
enzymes
,
mode
,
aa_pka
)
res
=
digest
.
digest_from_input
(
seq
,
"sequence"
,
enzymes
,
mode
,
aa_pka
)
assert
res
[
0
][
0
].
__repr__
()
==
"Number of cleavage: 4
\n
Number of miscle"
\
"avage: 0
\n
Positions of miscleavage: []"
\
"
\n
Ratio of miscleavage: 0.0
\n
Peptides: "
\
...
...
@@ -380,7 +380,8 @@ def test_digest_from_input(capsys, tmpdir):
# Test fasta file
fasta_file
=
tmpdir
.
join
(
"test.fasta"
)
fasta_file
.
write
(
">Fake1
\n
WQSDESDFZQS
\n
DESDF
\n
>Fake2
\n
NPHARDORCOMPLET"
)
res
=
digest
.
digest_from_input
(
str
(
fasta_file
),
enzymes
,
mode
,
aa_pka
)
res
=
digest
.
digest_from_input
(
str
(
fasta_file
),
"file"
,
enzymes
,
mode
,
aa_pka
)
assert
res
[
0
][
0
].
__repr__
()
==
"Number of cleavage: 4
\n
Number of miscle"
\
"avage: 0
\n
Positions of miscleavage: []"
\
"
\n
Ratio of miscleavage: 0.0
\n
Peptides: "
\
...
...
@@ -416,9 +417,10 @@ def test_digest_from_input(capsys, tmpdir):
# Test fastq file (same result)
fastq_file
=
tmpdir
.
join
(
"test.fastq"
)
fastq_file
.
write
(
"@Fake1
\n
WQSDESDFZQSDESDF
\n
+Fake1
\n
nWQSDESDFZQSDESDF
\n
@Fa"
\
"ke2
\n
NPHARDORCOMPLET
\n
+Fake2
\n
nNPHARDORCOMPLET
\n
"
)
res
=
digest
.
digest_from_input
(
str
(
fastq_file
),
enzymes
,
mode
,
aa_pka
)
fastq_file
.
write
(
"@Fake1
\n
WQSDESDFZQSDESDF
\n
+Fake1
\n
nWQSDESDFZQSDESDF
\n
@F"
\
"ake2
\n
NPHARDORCOMPLET
\n
+Fake2
\n
nNPHARDORCOMPLET
\n
"
)
res
=
digest
.
digest_from_input
(
str
(
fastq_file
),
"file"
,
enzymes
,
mode
,
aa_pka
)
assert
res
[
0
][
0
].
__repr__
()
==
"Number of cleavage: 4
\n
Number of miscle"
\
"avage: 0
\n
Positions of miscleavage: []"
\
"
\n
Ratio of miscleavage: 0.0
\n
Peptides: "
\
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment