Commit df253c38 authored by Nicolas  MAILLET's avatar Nicolas MAILLET
Browse files

v 1.1.0 - change input option -i (file only) and add -s (sequence)

parent 54c61dd0
Pipeline #10913 passed with stages
in 58 seconds
=========
CHANGELOG
=========
- 1.1.0
Modify input. Now, option -i only takes files. Use option -s to input sequence
- 1.0.9
Correct a bug of random dict in the creation of new enzyme
......
......@@ -26,7 +26,7 @@ author = 'Nicolas Maillet'
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
release = '1.0.9'
release = '1.1.0'
# -- General configuration ---------------------------------------------------
......
......@@ -29,9 +29,9 @@
necessary functions
"""
__version_info__ = ('1', '0', '9')
__version_info__ = ('1', '1', '0')
__version__ = '.'.join(__version_info__)
__revision_date__ = "2019-03-07"
__revision_date__ = "2019-04-03"
__author__ = "Nicolas Maillet"
import argparse
......@@ -260,10 +260,11 @@ def main():
help="Output file format. Either 'fasta', 'csv', or "
"'tsv' (default: fasta)")
group_launch.add_argument("-i", "--inputdata", metavar="",
help="Input file, in fasta / fastq format or a "
"single protein sequence without commentary")
help="Input file, in fasta / fastq format")
group_launch.add_argument("-l", "--list", action="store_true",
help="Display the list of available enzymes")
group_launch.add_argument("-s", "--sequence", metavar="",
help="Input a single protein sequence without commentary")
parser.add_argument("-m", "--miscleavage", metavar="", default=[],
nargs='+', type=restricted_float,
help="Percentage of miscleavage, between 0 and 100,"
......@@ -321,6 +322,20 @@ def main():
args.quiet = 1
args.verbose = 0
# input data
input_data = None
input_type = None
if args.inputdata:
if os.path.isfile(args.inputdata):
input_data = args.inputdata
input_type = "file"
else:
core.handle_errors("file not found (%s)." % args.inputdata, 0, "I"\
"nput ")
elif args.sequence:
input_data = args.sequence
input_type = "sequence"
# --outputfile / --randomname options
output_file = "" # No output file (default)
if args.randomname:
......@@ -369,7 +384,7 @@ def main():
# Output options
if args.verbose:
print("Input: " + args.inputdata)
print("Input: " + input_data)
print("Enzyme(s) used: " + str([enz.name for enz in enzymes_to_use]))
print("Mode: " + mode)
print("miscleavage ratio: " +
......@@ -378,7 +393,7 @@ def main():
print("Output file: " + os.path.abspath(output_file))
# Make the actual digestion of input data
results_digestion = digest.digest_from_input(args.inputdata,
results_digestion = digest.digest_from_input(input_data, input_type,
enzymes_to_use, mode, aa_pka)
# Output results
......
......@@ -323,7 +323,7 @@ def digest_one_sequence(seq, enz, mode, aa_pka):
elif mode == "concurrent":
ret = concurrent_digest(seq, enz, aa_pka)
else:
core.handle_errors("not able to understand digetion mode. Switching "
core.handle_errors("not able to understand digestion mode. Switching "
"to 'sequential'.")
ret = sequential_digest(seq, enz, aa_pka)
return ret
......@@ -409,15 +409,17 @@ def concurrent_digest(seq, enz, aa_pka):
# it will be one result by enzyme
return [result]
def digest_from_input(input_data, enz, mode, aa_pka):
def digest_from_input(input_data, input_type, enz, mode, aa_pka):
"""Digest all sequences of input data with
selected enzymes and mode.
:param input_data: either a sequence or a file of sequence (fasta/fastq)
:param input_data: either a sequence or the path of a file of sequence (fasta/fastq)
:param input_type: either 'sequence' or 'file'
:param enz: enzymes to digest with
:param mode: digestion mode (concurrent / sequential)
:param aa_pka: pKa values (IPC / Stryer)
:type input_data: str
:type input_type: str
:type enz: list(:py:class:`~rpg.enzyme.Enzyme`)
:type mode: str
:type aa_pka: str
......@@ -428,7 +430,7 @@ def digest_from_input(input_data, enz, mode, aa_pka):
# Results of digestion
results_digestion = []
# Input is a file?
if os.path.isfile(input_data):
if input_type == "file":
with open(input_data) as in_file:
header_first_car = in_file.read(1)
in_file.seek(0)
......@@ -476,11 +478,15 @@ def digest_from_input(input_data, enz, mode, aa_pka):
core.handle_errors("input file format not recognized (%s)." %
header_first_car, 0, "Input ")
# input is a single sequence
else:
elif input_type == "sequence":
tmp_seq = sequence.Sequence("Input",
sequence.check_sequence(input_data))
# Digest the sequence
results_digestion.append(digest_one_sequence(tmp_seq, enz, mode,
aa_pka))
# bad input
else:
core.handle_errors("input type not recognized (%s)." %
input_type, 0, "Input ")
# Return all peptides
return results_digestion
......@@ -3,8 +3,8 @@ import os
from setuptools import setup, find_packages
_MAJOR = 1
_MINOR = 0
_MICRO = 9
_MINOR = 1
_MICRO = 0
version = '%d.%d.%d' % (_MAJOR, _MINOR, _MICRO)
release = '%d.%d' % (_MAJOR, _MINOR)
......
......@@ -76,7 +76,8 @@ def test_output_results(capsys, tmpdir):
# CSV output
seq = "WQSDESDFZQSDESDF"
aa_pka = core.AA_PKA_IPC
all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka)
all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode,
aa_pka)
output_file = tmpdir.join("test_result.csv")
fmt = "csv"
quiet = False
......@@ -86,12 +87,12 @@ def test_output_results(capsys, tmpdir):
assert out == output_file.read()
assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\
"pos,Peptide_size,Peptide_mass,pI,Sequence\n"\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\
"\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\
"D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\
"FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\
"3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\
"8,5.97,F\n"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\
"D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\
"ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\
"14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\
"58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\
"19188,5.97,F\n"
# TSV output
output_file = tmpdir.join("test_result.tsv")
......@@ -104,12 +105,12 @@ def test_output_results(capsys, tmpdir):
assert output_file.read() == "Original_header\tNo_peptide\tEnzyme\tCleavi"\
"ng_pos\tPeptide_size\tPeptide_mass\tpI\tSeq"\
"uence\nInput\t0\tfake_enzyme1\t4\t4\t534.52"\
"598\t3.14\tWQSD\nInput\t1\tfake_enzyme1\t7\t"\
"3\t349.29758\t3.04\tESD\nInput\t2\tfake_enzy"\
"me1\t12\t5\t495.48938\t3.14\tFZQSD\nInput\t3"\
"\tfake_enzyme1\t15\t3\t349.29758\t3.04\tESD"\
"\nInput\t4\tfake_enzyme1\t16\t1\t165.19188"\
"\t5.97\tF\n"
"598\t3.14\tWQSD\nInput\t1\tfake_enzyme1\t7"\
"\t3\t349.29758\t3.04\tESD\nInput\t2\tfake_e"\
"nzyme1\t12\t5\t495.48938\t3.14\tFZQSD\nInpu"\
"t\t3\tfake_enzyme1\t15\t3\t349.29758\t3.04"\
"\tESD\nInput\t4\tfake_enzyme1\t16\t1\t165.1"\
"9188\t5.97\tF\n"
# Fasta output
output_file = tmpdir.join("test_result.fasta")
......@@ -119,16 +120,17 @@ def test_output_results(capsys, tmpdir):
core.output_results(str(output_file), all_seq_digested, fmt, quiet, verbose)
out, err = capsys.readouterr()
assert out == output_file.read()
assert output_file.read() == ">Input_0_fake_enzyme1_4_4_534.52598_3.14\nWQ"\
"SD\n>Input_1_fake_enzyme1_7_3_349.29758_3.04"\
"\nESD\n>Input_2_fake_enzyme1_12_5_495.48938"\
"_3.14\nFZQSD\n>Input_3_fake_enzyme1_15_3_349"\
".29758_3.04\nESD\n>Input_4_fake_enzyme1_16_1"\
"_165.19188_5.97\nF\n"
assert output_file.read() == ">Input_0_fake_enzyme1_4_4_534.52598_3.14\nW"\
"QSD\n>Input_1_fake_enzyme1_7_3_349.29758_3."\
"04\nESD\n>Input_2_fake_enzyme1_12_5_495.489"\
"38_3.14\nFZQSD\n>Input_3_fake_enzyme1_15_3_"\
"349.29758_3.04\nESD\n>Input_4_fake_enzyme1_"\
"16_1_165.19188_5.97\nF\n"
# CSV output in quiet
seq = "WQSDESDFZQSDESDF"
all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka)
all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode,
aa_pka)
output_file = tmpdir.join("test_result.csv")
fmt = "csv"
quiet = True
......@@ -140,16 +142,17 @@ def test_output_results(capsys, tmpdir):
assert out == ""
assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\
"pos,Peptide_size,Peptide_mass,pI,Sequence\n"\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\
"\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\
"D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\
"FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\
"3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\
"8,5.97,F\n"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\
"D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\
"ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\
"14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\
"58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\
"19188,5.97,F\n"
# CSV output in verbose > 2
seq = "WQSDESDFZQSDESDF"
all_seq_digested = digest.digest_from_input(seq, enzymes, mode, aa_pka)
all_seq_digested = digest.digest_from_input(seq, "sequence", enzymes, mode,
aa_pka)
output_file = tmpdir.join("test_result.csv")
fmt = "csv"
quiet = False
......@@ -159,12 +162,12 @@ def test_output_results(capsys, tmpdir):
out, err = capsys.readouterr()
assert output_file.read() == "Original_header,No_peptide,Enzyme,Cleaving_"\
"pos,Peptide_size,Peptide_mass,pI,Sequence\n"\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQSD"\
"\nInput,1,fake_enzyme1,7,3,349.29758,3.04,ES"\
"D\nInput,2,fake_enzyme1,12,5,495.48938,3.14,"\
"FZQSD\nInput,3,fake_enzyme1,15,3,349.29758,"\
"3.04,ESD\nInput,4,fake_enzyme1,16,1,165.1918"\
"8,5.97,F\n"
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\
"D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\
"ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\
"14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\
"58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\
"19188,5.97,F\n"
# Verbose > 2
assert out == "\nNumber of cleavage: 4\nCleavage position: 4, 7, 12, 15\n"\
"Number of miscleavage: 0\nmiscleavage position: \nmis"\
......
......@@ -342,7 +342,7 @@ def test_digest_from_input(capsys, tmpdir):
# Test wrong file
with pytest.raises(SystemExit) as pytest_wrapped_e:
digest.digest_from_input(str(Path.home()) + "/rpg_user.py",
digest.digest_from_input(str(Path.home()) + "/rpg_user.py", "file",
enzymes, mode, aa_pka)
_, err = capsys.readouterr()
assert err == "Input Error: input file format not recognized (f).\n"
......@@ -351,7 +351,7 @@ def test_digest_from_input(capsys, tmpdir):
# Test input data
seq = "WQSDESDFZQSDESDF"
res = digest.digest_from_input(seq, enzymes, mode, aa_pka)
res = digest.digest_from_input(seq, "sequence", enzymes, mode, aa_pka)
assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0.0\nPeptides: "\
......@@ -380,7 +380,8 @@ def test_digest_from_input(capsys, tmpdir):
# Test fasta file
fasta_file = tmpdir.join("test.fasta")
fasta_file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET")
res = digest.digest_from_input(str(fasta_file), enzymes, mode, aa_pka)
res = digest.digest_from_input(str(fasta_file), "file", enzymes, mode,
aa_pka)
assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0.0\nPeptides: "\
......@@ -416,9 +417,10 @@ def test_digest_from_input(capsys, tmpdir):
# Test fastq file (same result)
fastq_file = tmpdir.join("test.fastq")
fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@Fa"\
"ke2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n")
res = digest.digest_from_input(str(fastq_file), enzymes, mode, aa_pka)
fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\
"ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n")
res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode,
aa_pka)
assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0.0\nPeptides: "\
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment