Commit ddf4a2a6 authored by Nicolas  MAILLET's avatar Nicolas MAILLET
Browse files

Increase global coverage and test for parallel functions

parent aceef805
"""Tests for core.py"""
import gzip
import pytest
from .context import rpg
from rpg import core
......@@ -175,6 +176,44 @@ def test_output_results(capsys, tmpdir):
"al peptide: WQSD\nC terminal peptide: F\n" + \
output_file.read()
# No output file
output_file = None
fmt = "csv"
quiet = False
verbose = 3
core.output_results(output_file, all_seq_digested, fmt, quiet,
verbose)
out, err = capsys.readouterr()
assert out == "\nNumber of cleavage: 4\nCleavage position: 4, 7, 12, 15\n"\
"Number of miscleavage: 0\nmiscleavage position: \nmis"\
"cleavage ratio: 0.00%\nSmallest peptide size: 1\nN termin"\
"al peptide: WQSD\nC terminal peptide: F\nOriginal_header,"\
"No_peptide,Enzyme,Cleaving_"\
"pos,Peptide_size,Peptide_mass,pI,Sequence\n"\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\
"D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\
"ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\
"14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\
"58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\
"19188,5.97,F\n"
# No output file et less verbose
output_file = None
fmt = "csv"
quiet = False
verbose = 1
core.output_results(output_file, all_seq_digested, fmt, quiet,
verbose)
out, err = capsys.readouterr()
assert out == "Original_header,No_peptide,Enzyme,Cleaving_"\
"pos,Peptide_size,Peptide_mass,pI,Sequence\n"\
"Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\
"D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\
"ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\
"14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\
"58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\
"19188,5.97,F\n"
def test_peptide():
"""Test class 'Peptide'"""
header = "Test"
......@@ -216,3 +255,79 @@ def test_peptide():
# Test function 'def get_isoelectric_point():'
assert pep3.get_isoelectric_point() == 2.91
def test_next_read(capsys, tmpdir):
""" Test function 'next_read(files)'"""
# Test fasta (multi-line) file with two sequences
fasta_file = tmpdir.join("test.fasta")
fasta_file.write(">Fake1\nACGTTATATGCTA\nTGTG\n>Fake2\nCAGTACTAGCA")
# Only a portion of the file (from 0 to 3)
res = core.next_read(fasta_file, 0, 3)
# First read
a_read = next(res, None)
assert a_read == (">Fake1", "ACGTTATATGCTATGTG")
# No second read
a_read = next(res, None)
assert a_read is None
# Full file
res = core.next_read(fasta_file, 0, 35)
# First read
a_read = next(res, None)
assert a_read == (">Fake1", "ACGTTATATGCTATGTG")
# No second read
a_read = next(res, None)
assert a_read == (">Fake2", "CAGTACTAGCA")
# Test gzipped fasta file
data = b">Fake1\nACGTTATATGCTATGT\n"
fastagz_file = tmpdir.join("test.fasta.gz")
with gzip.open(fastagz_file, "wb") as fil:
fil.write(data)
res = core.next_read(fastagz_file, 0, 35)
# First read
a_read = next(res)
assert a_read == (">Fake1", "ACGTTATATGCTATGT")
# Test fastq file with two sequence
fastq_file = tmpdir.join("test.fastq")
fastq_file.write("@Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n"\
"@Fake2\nATGCATGCTG\n+Fake1\n8173*8173!")
# First sequence only (from 0 to 3)
res = core.next_read(fastq_file, 0, 3)
# First read
a_read = next(res, None)
assert a_read == ("@Fake1", "ACGTTATATGCTATGTG")
# No second read
a_read = next(res, None)
assert a_read is None
# Full file
res = core.next_read(fastq_file, 0, 1000)
# First read
a_read = next(res, None)
assert a_read == ("@Fake1", "ACGTTATATGCTATGTG")
# Second read
a_read = next(res, None)
assert a_read == ("@Fake2", "ATGCATGCTG")
# Test gzipped fastq file
data = b"@Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n"
fastqgz_file = tmpdir.join("test.fastq.gz")
with gzip.open(fastqgz_file, "wb") as fil:
fil.write(data)
res = core.next_read(fastqgz_file, 0, 35)
# First read
a_read = next(res)
assert a_read == ("@Fake1", "ACGTTATATGCTATGTG")
# Test wrong gzipped fastq file
data = b"+Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n"
fastqgz_file = tmpdir.join("test.fastq.gz")
with gzip.open(fastqgz_file, "wb") as fil:
fil.write(data)
with pytest.raises(ValueError) as pytest_wrapped_e:
res = core.next_read(fastqgz_file, 0, 35)
a_read = next(res)
assert pytest_wrapped_e.type == ValueError
assert str(pytest_wrapped_e.value) == "input file format not recognized (+)."
......@@ -53,6 +53,7 @@ def test_resultonedigestion():
assert res_dig0 != res_dig3
assert res_dig0 != res_dig4
assert res_dig0 != res_dig5
assert res_dig0 != 42
# Test function '__format__()'
format_res = res_dig0.__format__("csv")
......@@ -171,7 +172,7 @@ def test_one_digest():
assert res.peptides[0].__repr__() == res_pep0.__repr__()
assert res.peptides[1].__repr__() == res_pep1.__repr__()
def test_digest_one_sequence():
def test_digest_one_sequence(capsys):
"""Test function 'def digest_one_sequence(seq, enz, mode)'"""
enzymes = []
# First enzyme: cut after D not precedeed by S
......@@ -248,6 +249,8 @@ def test_digest_one_sequence():
assert res[1].peptides[1].__repr__() == res_pep2.__repr__()
assert res[1].peptides[2].__repr__() == res_pep3.__repr__()
capsys.readouterr()
def test_sequential_digest():
"""Test function 'sequential_digest(seq, enz)'"""
enzymes = []
......@@ -330,7 +333,8 @@ def test_concurrent_digest():
assert res[0].peptides[4].__repr__() == res_pep4.__repr__()
def test_digest_from_input(capsys, tmpdir):
""" Test function 'digest_from_input(input_data, enzymes, mode)'"""
""" Test function 'digest_from_input(input_data, input_type, enz,
mode, aa_pka, nb_proc=1)'"""
rule_dict = {}
rule_txt = "(S)(D,)"
rule_dict[rule_txt] = True
......@@ -346,7 +350,6 @@ def test_digest_from_input(capsys, tmpdir):
enzymes, mode, aa_pka)
_, err = capsys.readouterr()
assert err == "Input Error: input file format not recognized (f).\n"
assert pytest_wrapped_e.type == SystemExit
assert pytest_wrapped_e.value.code == 1
# Test input data
......@@ -377,6 +380,14 @@ def test_digest_from_input(capsys, tmpdir):
": 165.19188\npKa values from: IPC\nPep. p"\
"I: 5.97\nSequence: F\n]\n"
# Test wrong input data
with pytest.raises(SystemExit) as pytest_wrapped_e:
seq = "WQSD2ESD"
res = digest.digest_from_input(seq, "sequence", enzymes, mode, aa_pka)
_, err = capsys.readouterr()
assert err == "Input Error: amino acid \"2\" in WQSD2ESD not recognized.\n"
assert pytest_wrapped_e.value.code == 1
# Test fasta file
fasta_file = tmpdir.join("test.fasta")
fasta_file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET")
......@@ -415,41 +426,113 @@ def test_digest_from_input(capsys, tmpdir):
" IPC\nPep. pI: 7.16\nSequence: NPHARDORCO"\
"MPLET\n]\n"
# Test fastq file (same result)
# Test fastq file (same result) with multiple proc
fastq_file = tmpdir.join("test.fastq")
fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\
"ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n")
res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode,
aa_pka)
assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0.0\nPeptides: "\
"[Original header: Fake1\nNo. peptide: 0\n"\
"Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\
" size: 4\nPep. mass: 534.52598\npKa value"\
"s from: IPC\nPep. pI: 3.14\nSequence: WQS"\
"D\n, Original header: Fake1\nNo. peptide:"\
" 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\
"Pep. size: 3\nPep. mass: 349.29758\npKa v"\
"alues from: IPC\nPep. pI: 3.04\nSequence:"\
" ESD\n, Original header: Fake1\nNo. pepti"\
"de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\
"12\nPep. size: 5\nPep. mass: 495.48938\np"\
"Ka values from: IPC\nPep. pI: 3.14\nSeque"\
"nce: FZQSD\n, Original header: Fake1\nNo."\
" peptide: 3\nEnzyme: fake_enzyme1\nCleav."\
" pos: 15\nPep. size: 3\nPep. mass: 349.29"\
"758\npKa values from: IPC\nPep. pI: 3.04"\
"\nSequence: ESD\n, Original header: Fake1"\
"\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\
"leav. pos: 16\nPep. size: 1\nPep. mass: 1"\
"65.19188\npKa values from: IPC\nPep. pI: "\
"5.97\nSequence: F\n]\n"
assert res[1][0].__repr__() == "Number of cleavage: 0\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0\nPeptides: [O"\
"riginal header: Fake2\nNo. peptide: 0\nEn"\
"zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\
"ize: 15\nPep. mass: 2014.35098\npKa value"\
"s from: IPC\nPep. pI: 7.16\nSequence: NPH"\
"ARDORCOMPLET\n]\n"
aa_pka, 2)
# Multi proc, we an't predict which result will be first or second
results_unsorted = []
results_unsorted.append("Number of cleavage: 4\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0.0\nPeptides: "\
"[Original header: Fake1\nNo. peptide: 0\n"\
"Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\
" size: 4\nPep. mass: 534.52598\npKa value"\
"s from: IPC\nPep. pI: 3.14\nSequence: WQS"\
"D\n, Original header: Fake1\nNo. peptide:"\
" 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\
"Pep. size: 3\nPep. mass: 349.29758\npKa v"\
"alues from: IPC\nPep. pI: 3.04\nSequence:"\
" ESD\n, Original header: Fake1\nNo. pepti"\
"de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\
"12\nPep. size: 5\nPep. mass: 495.48938\np"\
"Ka values from: IPC\nPep. pI: 3.14\nSeque"\
"nce: FZQSD\n, Original header: Fake1\nNo."\
" peptide: 3\nEnzyme: fake_enzyme1\nCleav."\
" pos: 15\nPep. size: 3\nPep. mass: 349.29"\
"758\npKa values from: IPC\nPep. pI: 3.04"\
"\nSequence: ESD\n, Original header: Fake1"\
"\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\
"leav. pos: 16\nPep. size: 1\nPep. mass: 1"\
"65.19188\npKa values from: IPC\nPep. pI: "\
"5.97\nSequence: F\n]\n")
results_unsorted.append("Number of cleavage: 0\nNumber of miscle"\
"avage: 0\nPositions of miscleavage: []"\
"\nRatio of miscleavage: 0\nPeptides: [O"\
"riginal header: Fake2\nNo. peptide: 0\nEn"\
"zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\
"ize: 15\nPep. mass: 2014.35098\npKa value"\
"s from: IPC\nPep. pI: 7.16\nSequence: NPH"\
"ARDORCOMPLET\n]\n")
assert len(res) == 2
assert res[0][0].__repr__() in results_unsorted
assert res[1][0].__repr__() in results_unsorted
# Test wrong fastq file
fastq_file = tmpdir.join("test.fastq")
fastq_file.write("?Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\
"ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n")
with pytest.raises(SystemExit) as pytest_wrapped_e:
res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode,
aa_pka, 4)
_, err = capsys.readouterr()
assert err == "Input Error: input file format not recognized (?).\n"
assert pytest_wrapped_e.value.code == 1
# Test wrong input type
with pytest.raises(SystemExit) as pytest_wrapped_e:
res = digest.digest_from_input(str(fastq_file), "42", enzymes, mode,
aa_pka, 4)
_, err = capsys.readouterr()
assert err == "Input Error: input type not recognized (42).\n"
assert pytest_wrapped_e.value.code == 1
def test_digest_part(tmpdir):
""" Test function 'digest_part(offset_start, offset_end, file, enz,
mode, aa_pka)'"""
# Fake input file
file = tmpdir.join("test.fasta")
file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET")
rule_dict = {}
rule_txt = "(S)(D,)"
rule_dict[rule_txt] = True
all_rules = rule.create_rules(rule_dict)
enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules)
enz = [enz1]
mode = "sequential"
aa_pka = core.AA_PKA_IPC
# Read the whole file
offset_start = 0
offset_end = 1000
# Get the queries
results_digestion = digest.digest_part(offset_start, offset_end, file, enz,
mode, aa_pka)
# We have 2 res, one for each ref
assert len(results_digestion) == 2
assert results_digestion[1][0].enzyme_name == "fake_enzyme1"
assert results_digestion[1][0].nb_cleavage == 0
# Fake false input file
file = tmpdir.join("test.fasta")
file.write(",Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET")
rule_dict = {}
rule_txt = "(S)(D,)"
rule_dict[rule_txt] = True
all_rules = rule.create_rules(rule_dict)
enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules)
enz = [enz1]
mode = "sequential"
aa_pka = core.AA_PKA_IPC
# Read the whole file
offset_start = 0
offset_end = 1000
# Get the queries
with pytest.raises(ValueError) as pytest_wrapped_e:
results_digestion = digest.digest_part(offset_start, offset_end, file,
enz, mode, aa_pka)
# We have a ValueError
assert pytest_wrapped_e.type == ValueError
assert str(pytest_wrapped_e.value) == "input file format not recognized (,)."
......@@ -39,6 +39,7 @@ def test_enzyme(tmpdir):
# Test function '__eq__()'
assert enz0 == enz1
assert enz0 != enz2
assert enz0 != 42
"""Test function
'write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE)'
......@@ -61,7 +62,7 @@ def test_enzyme(tmpdir):
's\nAVAILABLE_ENZYMES_USER.append(ENZYME)\nC'\
'PT_ENZ += 1\n'
def test_check_enzyme_name():
def test_check_enzyme_name(capsys):
"""Test function 'check_enzyme_name(name_new_enz, all_name_enz)'."""
# Already taken names
......@@ -90,21 +91,4 @@ def test_check_enzyme_name():
res = enzyme.check_enzyme_name(seq_name, all_name)
assert res is False
'''
def test_user_creation_enzyme(capsys, monkeypatch):
"""Test function 'user_creation_enzyme()'"""
monkeypatch.setattr(builtins.input, "Mark")
i = input("What is your name?")
assert i == "Mark"
inputs = [10, 'y']
input_generator = (i for i in inputs)
monkeypatch.setitem('__builtins__.input', lambda prompt: next(input_generator))
core.user_creation_enzyme()
out, err = capsys.readouterr()
print(out)
print(err)
assert input('how many?') == 10
assert input('you sure?') == 'y'
'''
capsys.readouterr()
......@@ -87,7 +87,7 @@ def test_rule():
'leaves before D, followed by Q, except...\nD_0.rules.appen'\
'd(D_0Q2)\nENZ.append(D_0)\n\n'
def test_check_rule():
def test_check_rule(capsys):
"""Test function 'check_rule(exprule)'."""
# Good
expr = "(,A or B,)"
......@@ -215,6 +215,8 @@ def test_check_rule():
res = rule.check_rule(expr)
assert res == expr
capsys.readouterr()
def test_split_complex_rule():
"""Test function 'split_complex_rule(rule)'."""
# No 'or'
......@@ -307,7 +309,6 @@ def test_create_rules():
all_rules = {}
# Simplest rule
# cleavage before A
rule1 = "(,A)"
all_rules[rule1] = True
# Truth for this rule
......@@ -375,7 +376,7 @@ def test_create_rules():
assert len(res) == 7
# Double comma and 'or'
# Cleaves after I or J, except if K is after, but cleaves if a L is before
# Cleaves before or after I or J, except if K is after, but cleaves if a L is before
all_rules = {}
rule6 = "(,I or J,)"
all_rules[rule6] = True
......@@ -402,6 +403,20 @@ def test_create_rules():
assert truth6_4 in res
assert len(res) == 4
# Similar complexe rule
# Cleaves before G if there is a H after
# But don't cleaves before G otherwise
all_rules = {}
rule7 = "(,G)(H)"
all_rules[rule7] = True
# Truth for rule7
truth7_1 = rule.Rule(1, "H", True, -1)
truth7 = rule.Rule(0, "G", False, 0) # Don't cleaves after G
truth7.rules.append(truth7_1)
res = rule.create_rules(all_rules)
assert truth7 in res
assert len(res) == 1
def test_handle_rule():
"""Test function 'handle_rule(seq, pos, a_rule, cleavage)'"""
......
"""Tests for sequence.py"""
import pytest
from .context import rpg
from rpg import core
from rpg import sequence
def test_peptide():
"""Test class 'Peptide'"""
header = "fake_sequence"
seq = "QWSDESDF"
enz_name = "Pwet"
aa_pka = core.AA_PKA_IPC
nb_peptide = 42
pep0 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide)
# Test function '__repr__()'
assert pep0.__repr__() == "Original header: fake_sequence\nNo. peptide: "\
"42\nEnzyme: Pwet\nCleav. pos: 0\nPep. size: 8"\
"\nPep. mass: 1012.98488\npKa values from: IPC"\
"\nPep. pI: 2.91\nSequence: QWSDESDF\n"
header = "fake_sequence"
seq = "QWSDESDF"
enz_name = "Pwet"
aa_pka = core.AA_PKA_IPC
nb_peptide = 42
pep1 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide)
header = "fake_sequence"
seq = "QWSDESDW"
enz_name = "Pwet"
aa_pka = core.AA_PKA_IPC
nb_peptide = 42
pep2 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide)
assert pep0 == pep1
assert pep0 != pep2
assert pep0 != 42
def test_sequence():
"""Test class 'Sequence'"""
header = "fake_sequence"
......@@ -28,6 +62,7 @@ def test_sequence():
assert seq0 == seq1
assert seq0 != seq2
assert seq0 != seq3
assert seq0 != 42
def test_check_sequence(capsys):
""" Test function 'check_sequence(seq)'"""
......@@ -35,10 +70,7 @@ def test_check_sequence(capsys):
assert sequence.check_sequence("aiHZODHUoh") == "AIHZODHUOH"
# Bad symbol
with pytest.raises(SystemExit) as pytest_wrapped_e:
with pytest.raises(ValueError) as pytest_wrapped_e:
sequence.check_sequence("a%HZODHUoh")
_, err = capsys.readouterr()
assert err == "Sequence Error: amino acid \"%\" in A%HZODHUOH not recogni"\
"zed.\n"
assert pytest_wrapped_e.type == SystemExit
assert pytest_wrapped_e.value.code == 1
assert "amino acid \"%\" in A%HZODHUOH not "\
"recognized." in str(pytest_wrapped_e.value)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment