Commit 2c070bd1 authored by Nicolas  MAILLET's avatar Nicolas MAILLET
Browse files

Add parallel execution

parent 94c9225e
......@@ -163,8 +163,6 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
:return: list of enzyme's id with associated miscleavage values
:rtype: list(int)
.. warning:: Not tested
"""
# Get the correct Enzymes inputed
......@@ -217,10 +215,7 @@ def get_enzymes_to_use(mode, id_enz_selected, miscleavage):
return enzymes_to_use
# Not tested
def main():
"""Launcher of RapidPeptidesGenerator
.. warning:: Not tested
"""
"""Launcher of RapidPeptidesGenerator"""
parser = argparse.ArgumentParser(description="This software takes protein "
"sequences as input (-i optio"
"n). All sequences will be cl"
......@@ -285,6 +280,8 @@ def main():
"to output result peptides.")
group_output.add_argument("-r", "--randomname", action="store_true",
help="Random (not used) output file name")
parser.add_argument("-c", "--processes", type=int, metavar="", default=1,
help="Number of parallel processes to use (default: 1)")
group_verbose = parser.add_mutually_exclusive_group()
group_verbose.add_argument("-q", "--quiet", action="store_true",
help="No standard output, only error(s)")
......@@ -322,6 +319,10 @@ def main():
args.quiet = 1
args.verbose = 0
# Be sure to have at least 1 process
if args.processes <= 0:
parser.error("argument -c/--processes should be greater than 0")
# input data
input_data = None
input_type = None
......@@ -394,13 +395,13 @@ def main():
# Make the actual digestion of input data
results_digestion = digest.digest_from_input(input_data, input_type,
enzymes_to_use, mode, aa_pka)
enzymes_to_use, mode, aa_pka,
args.processes)
# Output results
core.output_results(output_file, results_digestion, args.fmt, args.quiet,
args.verbose)
### Let'z go ###
if __name__ == '__main__':
main()
......
......@@ -24,6 +24,7 @@
"""Contains generic functions and global variables used by RPG"""
import sys
import gzip
AMINOACIDS = ["A", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "B", "X", "Z",
......@@ -204,3 +205,96 @@ def output_results(output_file, all_seq_digested, fmt, quiet, verbose):
except IOError:
handle_errors(output_file + " can't be open in 'w' mode", 0,
"File ")
def next_read(file, offset_start, offset_end):
""" Return each sequence between offsets range of a file
as a tuple (header, seq) using a generator.
Can be fasta or fastq, gzipped or not.
:param file: fasta/fastq file to read
:param offset_start: offset in the file from where to read
:param offset_end: offset in the file until where to read
:type file: str
:type offset_start: int
:type offset_end: int
"""
# Is it a GZIP file?
test_file = open(file, "rb")
# Get the first values
magic = test_file.read(2)
# Close the file
test_file.close()
# Open the file, GZIP or not
with (gzip.open(file, "rb") if magic == b"\x1f\x8b"
else open(file, "rb")) as in_file:
first_line = in_file.readline().decode('utf-8')
# FASTQ file
if first_line.startswith("@"):
# Go to starting offset
in_file.seek(offset_start)
# Set current offset
beg_line_offset = offset_start
# Read each line from this point
for line in in_file:
# Consider this line as a header
header = line.decode('utf-8').strip()
# It is a proper fastq header
if header.startswith("@"):
# The beginning of header is in the offset range
if beg_line_offset < offset_end:
# Get the sequence
sequence = in_file.readline().decode('utf-8').strip()
# Skip the two next lines
in_file.readline()
in_file.readline()
# Return header and sequence and wait for the next one
yield (header, sequence.upper())
# Out of offset, stop this loop
else:
break
# Current offset
beg_line_offset = in_file.tell()
# (multi?)FASTA file
elif first_line.startswith(">"):
# Go to starting offset
in_file.seek(offset_start)
# Set current offset
beg_line_offset = offset_start
# Read each line from this point
for line in in_file:
# Consider this line as a header
header = line.decode('utf-8').strip()
# It is a proper fasta header
if header.startswith(">"):
# The beginning of header is in the offset range
if beg_line_offset < offset_end:
# Get the sequence
sequence = in_file.readline().decode('utf-8').strip()
# Get current offset
current_offset = in_file.tell()
# Get next line
next_l = in_file.readline().decode('utf-8').strip()
# While this next line is not a fasta header...
while next_l and not next_l.startswith(">"):
# Add this to the Sequence
sequence += next_l
# Get current offset
current_offset = in_file.tell()
# Get next line
next_l = in_file.readline().decode('utf-8').strip()
# Next line is a fasta header, go back to its beginning
in_file.seek(current_offset)
# Return header and sequence and wait for the next one
yield (header, sequence.upper())
# Out of offset, stop this loop
else:
break
# Current offset
beg_line_offset = in_file.tell()
# Not a valid file
else:
# Stop the generator with the error to show
raise ValueError("input file format not recognized (%s)"\
"." % first_line[0])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment