Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
R
rpg
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Nicolas MAILLET
rpg
Commits
2c070bd1
Commit
2c070bd1
authored
Feb 3, 2021
by
Nicolas MAILLET
Browse files
Options
Downloads
Patches
Plain Diff
Add parallel execution
parent
94c9225e
No related branches found
No related tags found
No related merge requests found
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
rpg/RapidPeptidesGenerator.py
+9
-8
9 additions, 8 deletions
rpg/RapidPeptidesGenerator.py
rpg/core.py
+94
-0
94 additions, 0 deletions
rpg/core.py
with
103 additions
and
8 deletions
rpg/RapidPeptidesGenerator.py
+
9
−
8
View file @
2c070bd1
...
@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
...
@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
:return: list of enzyme
'
s id with associated miscleavage values
:return: list of enzyme
'
s id with associated miscleavage values
:rtype: list(int)
:rtype: list(int)
.. warning:: Not tested
"""
"""
# Get the correct Enzymes inputed
# Get the correct Enzymes inputed
...
@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
...
@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
return
enzymes_to_use
return
enzymes_to_use
# Not tested
# Not tested
def
main
():
def
main
():
"""
Launcher of RapidPeptidesGenerator
"""
Launcher of RapidPeptidesGenerator
"""
.. warning:: Not tested
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"
This software takes protein
"
parser
=
argparse
.
ArgumentParser
(
description
=
"
This software takes protein
"
"
sequences as input (-i optio
"
"
sequences as input (-i optio
"
"
n). All sequences will be cl
"
"
n). All sequences will be cl
"
...
@@ -285,6 +280,8 @@ def main():
...
@@ -285,6 +280,8 @@ def main():
"
to output result peptides.
"
)
"
to output result peptides.
"
)
group_output
.
add_argument
(
"
-r
"
,
"
--randomname
"
,
action
=
"
store_true
"
,
group_output
.
add_argument
(
"
-r
"
,
"
--randomname
"
,
action
=
"
store_true
"
,
help
=
"
Random (not used) output file name
"
)
help
=
"
Random (not used) output file name
"
)
parser
.
add_argument
(
"
-c
"
,
"
--processes
"
,
type
=
int
,
metavar
=
""
,
default
=
1
,
help
=
"
Number of parallel processes to use (default: 1)
"
)
group_verbose
=
parser
.
add_mutually_exclusive_group
()
group_verbose
=
parser
.
add_mutually_exclusive_group
()
group_verbose
.
add_argument
(
"
-q
"
,
"
--quiet
"
,
action
=
"
store_true
"
,
group_verbose
.
add_argument
(
"
-q
"
,
"
--quiet
"
,
action
=
"
store_true
"
,
help
=
"
No standard output, only error(s)
"
)
help
=
"
No standard output, only error(s)
"
)
...
@@ -322,6 +319,10 @@ def main():
...
@@ -322,6 +319,10 @@ def main():
args
.
quiet
=
1
args
.
quiet
=
1
args
.
verbose
=
0
args
.
verbose
=
0
# Be sure to have at least 1 process
if
args
.
processes
<=
0
:
parser
.
error
(
"
argument -c/--processes should be greater than 0
"
)
# input data
# input data
input_data
=
None
input_data
=
None
input_type
=
None
input_type
=
None
...
@@ -394,13 +395,13 @@ def main():
...
@@ -394,13 +395,13 @@ def main():
# Make the actual digestion of input data
# Make the actual digestion of input data
results_digestion
=
digest
.
digest_from_input
(
input_data
,
input_type
,
results_digestion
=
digest
.
digest_from_input
(
input_data
,
input_type
,
enzymes_to_use
,
mode
,
aa_pka
)
enzymes_to_use
,
mode
,
aa_pka
,
args
.
processes
)
# Output results
# Output results
core
.
output_results
(
output_file
,
results_digestion
,
args
.
fmt
,
args
.
quiet
,
core
.
output_results
(
output_file
,
results_digestion
,
args
.
fmt
,
args
.
quiet
,
args
.
verbose
)
args
.
verbose
)
### Let'z go ###
### Let'z go ###
if
__name__
==
'
__main__
'
:
if
__name__
==
'
__main__
'
:
main
()
main
()
...
...
...
...
This diff is collapsed.
Click to expand it.
rpg/core.py
+
94
−
0
View file @
2c070bd1
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
"""
Contains generic functions and global variables used by RPG
"""
"""
Contains generic functions and global variables used by RPG
"""
import
sys
import
sys
import
gzip
AMINOACIDS
=
[
"
A
"
,
"
C
"
,
"
D
"
,
"
E
"
,
"
F
"
,
"
G
"
,
"
H
"
,
"
I
"
,
"
J
"
,
"
K
"
,
"
L
"
,
"
M
"
,
"
N
"
,
AMINOACIDS
=
[
"
A
"
,
"
C
"
,
"
D
"
,
"
E
"
,
"
F
"
,
"
G
"
,
"
H
"
,
"
I
"
,
"
J
"
,
"
K
"
,
"
L
"
,
"
M
"
,
"
N
"
,
"
O
"
,
"
P
"
,
"
Q
"
,
"
R
"
,
"
S
"
,
"
T
"
,
"
U
"
,
"
V
"
,
"
W
"
,
"
Y
"
,
"
B
"
,
"
X
"
,
"
Z
"
,
"
O
"
,
"
P
"
,
"
Q
"
,
"
R
"
,
"
S
"
,
"
T
"
,
"
U
"
,
"
V
"
,
"
W
"
,
"
Y
"
,
"
B
"
,
"
X
"
,
"
Z
"
,
...
@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose):
...
@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose):
except
IOError
:
except
IOError
:
handle_errors
(
output_file
+
"
can
'
t be open in
'
w
'
mode
"
,
0
,
handle_errors
(
output_file
+
"
can
'
t be open in
'
w
'
mode
"
,
0
,
"
File
"
)
"
File
"
)
def
next_read
(
file
,
offset_start
,
offset_end
):
"""
Return each sequence between offsets range of a file
as a tuple (header, seq) using a generator.
Can be fasta or fastq, gzipped or not.
:param file: fasta/fastq file to read
:param offset_start: offset in the file from where to read
:param offset_end: offset in the file until where to read
:type file: str
:type offset_start: int
:type offset_end: int
"""
# Is it a GZIP file?
test_file
=
open
(
file
,
"
rb
"
)
# Get the first values
magic
=
test_file
.
read
(
2
)
# Close the file
test_file
.
close
()
# Open the file, GZIP or not
with
(
gzip
.
open
(
file
,
"
rb
"
)
if
magic
==
b
"
\x1f\x8b
"
else
open
(
file
,
"
rb
"
))
as
in_file
:
first_line
=
in_file
.
readline
().
decode
(
'
utf-8
'
)
# FASTQ file
if
first_line
.
startswith
(
"
@
"
):
# Go to starting offset
in_file
.
seek
(
offset_start
)
# Set current offset
beg_line_offset
=
offset_start
# Read each line from this point
for
line
in
in_file
:
# Consider this line as a header
header
=
line
.
decode
(
'
utf-8
'
).
strip
()
# It is a proper fastq header
if
header
.
startswith
(
"
@
"
):
# The beginning of header is in the offset range
if
beg_line_offset
<
offset_end
:
# Get the sequence
sequence
=
in_file
.
readline
().
decode
(
'
utf-8
'
).
strip
()
# Skip the two next lines
in_file
.
readline
()
in_file
.
readline
()
# Return header and sequence and wait for the next one
yield
(
header
,
sequence
.
upper
())
# Out of offset, stop this loop
else
:
break
# Current offset
beg_line_offset
=
in_file
.
tell
()
# (multi?)FASTA file
elif
first_line
.
startswith
(
"
>
"
):
# Go to starting offset
in_file
.
seek
(
offset_start
)
# Set current offset
beg_line_offset
=
offset_start
# Read each line from this point
for
line
in
in_file
:
# Consider this line as a header
header
=
line
.
decode
(
'
utf-8
'
).
strip
()
# It is a proper fasta header
if
header
.
startswith
(
"
>
"
):
# The beginning of header is in the offset range
if
beg_line_offset
<
offset_end
:
# Get the sequence
sequence
=
in_file
.
readline
().
decode
(
'
utf-8
'
).
strip
()
# Get current offset
current_offset
=
in_file
.
tell
()
# Get next line
next_l
=
in_file
.
readline
().
decode
(
'
utf-8
'
).
strip
()
# While this next line is not a fasta header...
while
next_l
and
not
next_l
.
startswith
(
"
>
"
):
# Add this to the Sequence
sequence
+=
next_l
# Get current offset
current_offset
=
in_file
.
tell
()
# Get next line
next_l
=
in_file
.
readline
().
decode
(
'
utf-8
'
).
strip
()
# Next line is a fasta header, go back to its beginning
in_file
.
seek
(
current_offset
)
# Return header and sequence and wait for the next one
yield
(
header
,
sequence
.
upper
())
# Out of offset, stop this loop
else
:
break
# Current offset
beg_line_offset
=
in_file
.
tell
()
# Not a valid file
else
:
# Stop the generator with the error to show
raise
ValueError
(
"
input file format not recognized (%s)
"
\
"
.
"
%
first_line
[
0
])
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment