Skip to content
Snippets Groups Projects
Commit 2c070bd1 authored by Nicolas  MAILLET's avatar Nicolas MAILLET
Browse files

Add parallel execution

parent 94c9225e
No related branches found
No related tags found
No related merge requests found
...@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage): ...@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
:return: list of enzyme's id with associated miscleavage values :return: list of enzyme's id with associated miscleavage values
:rtype: list(int) :rtype: list(int)
.. warning:: Not tested
""" """
# Get the correct Enzymes inputed # Get the correct Enzymes inputed
...@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage): ...@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
return enzymes_to_use return enzymes_to_use
# Not tested # Not tested
def main(): def main():
"""Launcher of RapidPeptidesGenerator """Launcher of RapidPeptidesGenerator"""
.. warning:: Not tested
"""
parser = argparse.ArgumentParser(description="This software takes protein " parser = argparse.ArgumentParser(description="This software takes protein "
"sequences as input (-i optio" "sequences as input (-i optio"
"n). All sequences will be cl" "n). All sequences will be cl"
...@@ -285,6 +280,8 @@ def main(): ...@@ -285,6 +280,8 @@ def main():
"to output result peptides.") "to output result peptides.")
group_output.add_argument("-r", "--randomname", action="store_true", group_output.add_argument("-r", "--randomname", action="store_true",
help="Random (not used) output file name") help="Random (not used) output file name")
parser.add_argument("-c", "--processes", type=int, metavar="", default=1,
help="Number of parallel processes to use (default: 1)")
group_verbose = parser.add_mutually_exclusive_group() group_verbose = parser.add_mutually_exclusive_group()
group_verbose.add_argument("-q", "--quiet", action="store_true", group_verbose.add_argument("-q", "--quiet", action="store_true",
help="No standard output, only error(s)") help="No standard output, only error(s)")
...@@ -322,6 +319,10 @@ def main(): ...@@ -322,6 +319,10 @@ def main():
args.quiet = 1 args.quiet = 1
args.verbose = 0 args.verbose = 0
# Be sure to have at least 1 process
if args.processes <= 0:
parser.error("argument -c/--processes should be greater than 0")
# input data # input data
input_data = None input_data = None
input_type = None input_type = None
...@@ -394,13 +395,13 @@ def main(): ...@@ -394,13 +395,13 @@ def main():
# Make the actual digestion of input data # Make the actual digestion of input data
results_digestion = digest.digest_from_input(input_data, input_type, results_digestion = digest.digest_from_input(input_data, input_type,
enzymes_to_use, mode, aa_pka) enzymes_to_use, mode, aa_pka,
args.processes)
# Output results # Output results
core.output_results(output_file, results_digestion, args.fmt, args.quiet, core.output_results(output_file, results_digestion, args.fmt, args.quiet,
args.verbose) args.verbose)
### Let'z go ### ### Let'z go ###
if __name__ == '__main__': if __name__ == '__main__':
main() main()
... ...
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
"""Contains generic functions and global variables used by RPG""" """Contains generic functions and global variables used by RPG"""
import sys import sys
import gzip
AMINOACIDS = ["A", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", AMINOACIDS = ["A", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "B", "X", "Z", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "B", "X", "Z",
...@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose): ...@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose):
except IOError: except IOError:
handle_errors(output_file + " can't be open in 'w' mode", 0, handle_errors(output_file + " can't be open in 'w' mode", 0,
"File ") "File ")
def next_read(file, offset_start, offset_end):
""" Return each sequence between offsets range of a file
as a tuple (header, seq) using a generator.
Can be fasta or fastq, gzipped or not.
:param file: fasta/fastq file to read
:param offset_start: offset in the file from where to read
:param offset_end: offset in the file until where to read
:type file: str
:type offset_start: int
:type offset_end: int
"""
# Is it a GZIP file?
test_file = open(file, "rb")
# Get the first values
magic = test_file.read(2)
# Close the file
test_file.close()
# Open the file, GZIP or not
with (gzip.open(file, "rb") if magic == b"\x1f\x8b"
else open(file, "rb")) as in_file:
first_line = in_file.readline().decode('utf-8')
# FASTQ file
if first_line.startswith("@"):
# Go to starting offset
in_file.seek(offset_start)
# Set current offset
beg_line_offset = offset_start
# Read each line from this point
for line in in_file:
# Consider this line as a header
header = line.decode('utf-8').strip()
# It is a proper fastq header
if header.startswith("@"):
# The beginning of header is in the offset range
if beg_line_offset < offset_end:
# Get the sequence
sequence = in_file.readline().decode('utf-8').strip()
# Skip the two next lines
in_file.readline()
in_file.readline()
# Return header and sequence and wait for the next one
yield (header, sequence.upper())
# Out of offset, stop this loop
else:
break
# Current offset
beg_line_offset = in_file.tell()
# (multi?)FASTA file
elif first_line.startswith(">"):
# Go to starting offset
in_file.seek(offset_start)
# Set current offset
beg_line_offset = offset_start
# Read each line from this point
for line in in_file:
# Consider this line as a header
header = line.decode('utf-8').strip()
# It is a proper fasta header
if header.startswith(">"):
# The beginning of header is in the offset range
if beg_line_offset < offset_end:
# Get the sequence
sequence = in_file.readline().decode('utf-8').strip()
# Get current offset
current_offset = in_file.tell()
# Get next line
next_l = in_file.readline().decode('utf-8').strip()
# While this next line is not a fasta header...
while next_l and not next_l.startswith(">"):
# Add this to the Sequence
sequence += next_l
# Get current offset
current_offset = in_file.tell()
# Get next line
next_l = in_file.readline().decode('utf-8').strip()
# Next line is a fasta header, go back to its beginning
in_file.seek(current_offset)
# Return header and sequence and wait for the next one
yield (header, sequence.upper())
# Out of offset, stop this loop
else:
break
# Current offset
beg_line_offset = in_file.tell()
# Not a valid file
else:
# Stop the generator with the error to show
raise ValueError("input file format not recognized (%s)"\
"." % first_line[0])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment