Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Nicolas MAILLET
rpg
Commits
2c070bd1
Commit
2c070bd1
authored
Feb 03, 2021
by
Nicolas MAILLET
Browse files
Add parallel execution
parent
94c9225e
Changes
2
Hide whitespace changes
Inline
Side-by-side
rpg/RapidPeptidesGenerator.py
View file @
2c070bd1
...
...
@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
:return: list of enzyme's id with associated miscleavage values
:rtype: list(int)
.. warning:: Not tested
"""
# Get the correct Enzymes inputed
...
...
@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
return
enzymes_to_use
# Not tested
def
main
():
"""Launcher of RapidPeptidesGenerator
.. warning:: Not tested
"""
"""Launcher of RapidPeptidesGenerator"""
parser
=
argparse
.
ArgumentParser
(
description
=
"This software takes protein "
"sequences as input (-i optio"
"n). All sequences will be cl"
...
...
@@ -285,6 +280,8 @@ def main():
"to output result peptides."
)
group_output
.
add_argument
(
"-r"
,
"--randomname"
,
action
=
"store_true"
,
help
=
"Random (not used) output file name"
)
parser
.
add_argument
(
"-c"
,
"--processes"
,
type
=
int
,
metavar
=
""
,
default
=
1
,
help
=
"Number of parallel processes to use (default: 1)"
)
group_verbose
=
parser
.
add_mutually_exclusive_group
()
group_verbose
.
add_argument
(
"-q"
,
"--quiet"
,
action
=
"store_true"
,
help
=
"No standard output, only error(s)"
)
...
...
@@ -322,6 +319,10 @@ def main():
args
.
quiet
=
1
args
.
verbose
=
0
# Be sure to have at least 1 process
if
args
.
processes
<=
0
:
parser
.
error
(
"argument -c/--processes should be greater than 0"
)
# input data
input_data
=
None
input_type
=
None
...
...
@@ -394,13 +395,13 @@ def main():
# Make the actual digestion of input data
results_digestion
=
digest
.
digest_from_input
(
input_data
,
input_type
,
enzymes_to_use
,
mode
,
aa_pka
)
enzymes_to_use
,
mode
,
aa_pka
,
args
.
processes
)
# Output results
core
.
output_results
(
output_file
,
results_digestion
,
args
.
fmt
,
args
.
quiet
,
args
.
verbose
)
### Let'z go ###
if
__name__
==
'__main__'
:
main
()
...
...
rpg/core.py
View file @
2c070bd1
...
...
@@ -24,6 +24,7 @@
"""Contains generic functions and global variables used by RPG"""
import
sys
import
gzip
AMINOACIDS
=
[
"A"
,
"C"
,
"D"
,
"E"
,
"F"
,
"G"
,
"H"
,
"I"
,
"J"
,
"K"
,
"L"
,
"M"
,
"N"
,
"O"
,
"P"
,
"Q"
,
"R"
,
"S"
,
"T"
,
"U"
,
"V"
,
"W"
,
"Y"
,
"B"
,
"X"
,
"Z"
,
...
...
@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose):
except
IOError
:
handle_errors
(
output_file
+
" can't be open in 'w' mode"
,
0
,
"File "
)
def
next_read
(
file
,
offset_start
,
offset_end
):
""" Return each sequence between offsets range of a file
as a tuple (header, seq) using a generator.
Can be fasta or fastq, gzipped or not.
:param file: fasta/fastq file to read
:param offset_start: offset in the file from where to read
:param offset_end: offset in the file until where to read
:type file: str
:type offset_start: int
:type offset_end: int
"""
# Is it a GZIP file?
test_file
=
open
(
file
,
"rb"
)
# Get the first values
magic
=
test_file
.
read
(
2
)
# Close the file
test_file
.
close
()
# Open the file, GZIP or not
with
(
gzip
.
open
(
file
,
"rb"
)
if
magic
==
b
"
\x1f\x8b
"
else
open
(
file
,
"rb"
))
as
in_file
:
first_line
=
in_file
.
readline
().
decode
(
'utf-8'
)
# FASTQ file
if
first_line
.
startswith
(
"@"
):
# Go to starting offset
in_file
.
seek
(
offset_start
)
# Set current offset
beg_line_offset
=
offset_start
# Read each line from this point
for
line
in
in_file
:
# Consider this line as a header
header
=
line
.
decode
(
'utf-8'
).
strip
()
# It is a proper fastq header
if
header
.
startswith
(
"@"
):
# The beginning of header is in the offset range
if
beg_line_offset
<
offset_end
:
# Get the sequence
sequence
=
in_file
.
readline
().
decode
(
'utf-8'
).
strip
()
# Skip the two next lines
in_file
.
readline
()
in_file
.
readline
()
# Return header and sequence and wait for the next one
yield
(
header
,
sequence
.
upper
())
# Out of offset, stop this loop
else
:
break
# Current offset
beg_line_offset
=
in_file
.
tell
()
# (multi?)FASTA file
elif
first_line
.
startswith
(
">"
):
# Go to starting offset
in_file
.
seek
(
offset_start
)
# Set current offset
beg_line_offset
=
offset_start
# Read each line from this point
for
line
in
in_file
:
# Consider this line as a header
header
=
line
.
decode
(
'utf-8'
).
strip
()
# It is a proper fasta header
if
header
.
startswith
(
">"
):
# The beginning of header is in the offset range
if
beg_line_offset
<
offset_end
:
# Get the sequence
sequence
=
in_file
.
readline
().
decode
(
'utf-8'
).
strip
()
# Get current offset
current_offset
=
in_file
.
tell
()
# Get next line
next_l
=
in_file
.
readline
().
decode
(
'utf-8'
).
strip
()
# While this next line is not a fasta header...
while
next_l
and
not
next_l
.
startswith
(
">"
):
# Add this to the Sequence
sequence
+=
next_l
# Get current offset
current_offset
=
in_file
.
tell
()
# Get next line
next_l
=
in_file
.
readline
().
decode
(
'utf-8'
).
strip
()
# Next line is a fasta header, go back to its beginning
in_file
.
seek
(
current_offset
)
# Return header and sequence and wait for the next one
yield
(
header
,
sequence
.
upper
())
# Out of offset, stop this loop
else
:
break
# Current offset
beg_line_offset
=
in_file
.
tell
()
# Not a valid file
else
:
# Stop the generator with the error to show
raise
ValueError
(
"input file format not recognized (%s)"
\
"."
%
first_line
[
0
])
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment