diff --git a/tests/test_core.py b/tests/test_core.py index fe264b7a9329d705b3d0a3cd491f9ec6de01d885..5d0543630ecc2c531e0ae95ea008325d58a9176f 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,5 @@ """Tests for core.py""" +import gzip import pytest from .context import rpg from rpg import core @@ -175,6 +176,44 @@ def test_output_results(capsys, tmpdir): "al peptide: WQSD\nC terminal peptide: F\n" + \ output_file.read() + # No output file + output_file = None + fmt = "csv" + quiet = False + verbose = 3 + core.output_results(output_file, all_seq_digested, fmt, quiet, + verbose) + out, err = capsys.readouterr() + assert out == "\nNumber of cleavage: 4\nCleavage position: 4, 7, 12, 15\n"\ + "Number of miscleavage: 0\nmiscleavage position: \nmis"\ + "cleavage ratio: 0.00%\nSmallest peptide size: 1\nN termin"\ + "al peptide: WQSD\nC terminal peptide: F\nOriginal_header,"\ + "No_peptide,Enzyme,Cleaving_"\ + "pos,Peptide_size,Peptide_mass,pI,Sequence\n"\ + "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\ + "D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\ + "ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\ + "14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\ + "58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\ + "19188,5.97,F\n" + + # No output file et less verbose + output_file = None + fmt = "csv" + quiet = False + verbose = 1 + core.output_results(output_file, all_seq_digested, fmt, quiet, + verbose) + out, err = capsys.readouterr() + assert out == "Original_header,No_peptide,Enzyme,Cleaving_"\ + "pos,Peptide_size,Peptide_mass,pI,Sequence\n"\ + "Input,0,fake_enzyme1,4,4,534.52598,3.14,WQS"\ + "D\nInput,1,fake_enzyme1,7,3,349.29758,3.04,"\ + "ESD\nInput,2,fake_enzyme1,12,5,495.48938,3."\ + "14,FZQSD\nInput,3,fake_enzyme1,15,3,349.297"\ + "58,3.04,ESD\nInput,4,fake_enzyme1,16,1,165."\ + "19188,5.97,F\n" + def test_peptide(): """Test class 'Peptide'""" header = "Test" @@ -216,3 +255,79 @@ def test_peptide(): # Test function 'def get_isoelectric_point():' assert pep3.get_isoelectric_point() == 2.91 + + +def test_next_read(capsys, tmpdir): + """ Test function 'next_read(files)'""" + # Test fasta (multi-line) file with two sequences + fasta_file = tmpdir.join("test.fasta") + fasta_file.write(">Fake1\nACGTTATATGCTA\nTGTG\n>Fake2\nCAGTACTAGCA") + # Only a portion of the file (from 0 to 3) + res = core.next_read(fasta_file, 0, 3) + # First read + a_read = next(res, None) + assert a_read == (">Fake1", "ACGTTATATGCTATGTG") + # No second read + a_read = next(res, None) + assert a_read is None + # Full file + res = core.next_read(fasta_file, 0, 35) + # First read + a_read = next(res, None) + assert a_read == (">Fake1", "ACGTTATATGCTATGTG") + # No second read + a_read = next(res, None) + assert a_read == (">Fake2", "CAGTACTAGCA") + + # Test gzipped fasta file + data = b">Fake1\nACGTTATATGCTATGT\n" + fastagz_file = tmpdir.join("test.fasta.gz") + with gzip.open(fastagz_file, "wb") as fil: + fil.write(data) + res = core.next_read(fastagz_file, 0, 35) + # First read + a_read = next(res) + assert a_read == (">Fake1", "ACGTTATATGCTATGT") + + # Test fastq file with two sequence + fastq_file = tmpdir.join("test.fastq") + fastq_file.write("@Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n"\ + "@Fake2\nATGCATGCTG\n+Fake1\n8173*8173!") + # First sequence only (from 0 to 3) + res = core.next_read(fastq_file, 0, 3) + # First read + a_read = next(res, None) + assert a_read == ("@Fake1", "ACGTTATATGCTATGTG") + # No second read + a_read = next(res, None) + assert a_read is None + # Full file + res = core.next_read(fastq_file, 0, 1000) + # First read + a_read = next(res, None) + assert a_read == ("@Fake1", "ACGTTATATGCTATGTG") + # Second read + a_read = next(res, None) + assert a_read == ("@Fake2", "ATGCATGCTG") + + # Test gzipped fastq file + data = b"@Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n" + fastqgz_file = tmpdir.join("test.fastq.gz") + with gzip.open(fastqgz_file, "wb") as fil: + fil.write(data) + res = core.next_read(fastqgz_file, 0, 35) + # First read + a_read = next(res) + assert a_read == ("@Fake1", "ACGTTATATGCTATGTG") + + # Test wrong gzipped fastq file + data = b"+Fake1\nACGTTATATGCTATGTG\n+Fake1\n5Q8D8=64DS-+DZ84!\n" + fastqgz_file = tmpdir.join("test.fastq.gz") + with gzip.open(fastqgz_file, "wb") as fil: + fil.write(data) + with pytest.raises(ValueError) as pytest_wrapped_e: + res = core.next_read(fastqgz_file, 0, 35) + a_read = next(res) + assert pytest_wrapped_e.type == ValueError + assert str(pytest_wrapped_e.value) == "input file format not recognized (+)." + diff --git a/tests/test_digest.py b/tests/test_digest.py index 89c28365d2d8ae6454ce6fabfe0affa4f9bfd5c1..60ba605a0c54fff75f8dc8abe2684ee4fb67cf9e 100644 --- a/tests/test_digest.py +++ b/tests/test_digest.py @@ -53,6 +53,7 @@ def test_resultonedigestion(): assert res_dig0 != res_dig3 assert res_dig0 != res_dig4 assert res_dig0 != res_dig5 + assert res_dig0 != 42 # Test function '__format__()' format_res = res_dig0.__format__("csv") @@ -171,7 +172,7 @@ def test_one_digest(): assert res.peptides[0].__repr__() == res_pep0.__repr__() assert res.peptides[1].__repr__() == res_pep1.__repr__() -def test_digest_one_sequence(): +def test_digest_one_sequence(capsys): """Test function 'def digest_one_sequence(seq, enz, mode)'""" enzymes = [] # First enzyme: cut after D not precedeed by S @@ -248,6 +249,8 @@ def test_digest_one_sequence(): assert res[1].peptides[1].__repr__() == res_pep2.__repr__() assert res[1].peptides[2].__repr__() == res_pep3.__repr__() + capsys.readouterr() + def test_sequential_digest(): """Test function 'sequential_digest(seq, enz)'""" enzymes = [] @@ -330,7 +333,8 @@ def test_concurrent_digest(): assert res[0].peptides[4].__repr__() == res_pep4.__repr__() def test_digest_from_input(capsys, tmpdir): - """ Test function 'digest_from_input(input_data, enzymes, mode)'""" + """ Test function 'digest_from_input(input_data, input_type, enz, + mode, aa_pka, nb_proc=1)'""" rule_dict = {} rule_txt = "(S)(D,)" rule_dict[rule_txt] = True @@ -346,7 +350,6 @@ def test_digest_from_input(capsys, tmpdir): enzymes, mode, aa_pka) _, err = capsys.readouterr() assert err == "Input Error: input file format not recognized (f).\n" - assert pytest_wrapped_e.type == SystemExit assert pytest_wrapped_e.value.code == 1 # Test input data @@ -377,6 +380,14 @@ def test_digest_from_input(capsys, tmpdir): ": 165.19188\npKa values from: IPC\nPep. p"\ "I: 5.97\nSequence: F\n]\n" + # Test wrong input data + with pytest.raises(SystemExit) as pytest_wrapped_e: + seq = "WQSD2ESD" + res = digest.digest_from_input(seq, "sequence", enzymes, mode, aa_pka) + _, err = capsys.readouterr() + assert err == "Input Error: amino acid \"2\" in WQSD2ESD not recognized.\n" + assert pytest_wrapped_e.value.code == 1 + # Test fasta file fasta_file = tmpdir.join("test.fasta") fasta_file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET") @@ -415,41 +426,113 @@ def test_digest_from_input(capsys, tmpdir): " IPC\nPep. pI: 7.16\nSequence: NPHARDORCO"\ "MPLET\n]\n" - # Test fastq file (same result) + # Test fastq file (same result) with multiple proc fastq_file = tmpdir.join("test.fastq") fastq_file.write("@Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\ "ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n") res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode, - aa_pka) - assert res[0][0].__repr__() == "Number of cleavage: 4\nNumber of miscle"\ - "avage: 0\nPositions of miscleavage: []"\ - "\nRatio of miscleavage: 0.0\nPeptides: "\ - "[Original header: Fake1\nNo. peptide: 0\n"\ - "Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\ - " size: 4\nPep. mass: 534.52598\npKa value"\ - "s from: IPC\nPep. pI: 3.14\nSequence: WQS"\ - "D\n, Original header: Fake1\nNo. peptide:"\ - " 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\ - "Pep. size: 3\nPep. mass: 349.29758\npKa v"\ - "alues from: IPC\nPep. pI: 3.04\nSequence:"\ - " ESD\n, Original header: Fake1\nNo. pepti"\ - "de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\ - "12\nPep. size: 5\nPep. mass: 495.48938\np"\ - "Ka values from: IPC\nPep. pI: 3.14\nSeque"\ - "nce: FZQSD\n, Original header: Fake1\nNo."\ - " peptide: 3\nEnzyme: fake_enzyme1\nCleav."\ - " pos: 15\nPep. size: 3\nPep. mass: 349.29"\ - "758\npKa values from: IPC\nPep. pI: 3.04"\ - "\nSequence: ESD\n, Original header: Fake1"\ - "\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\ - "leav. pos: 16\nPep. size: 1\nPep. mass: 1"\ - "65.19188\npKa values from: IPC\nPep. pI: "\ - "5.97\nSequence: F\n]\n" - assert res[1][0].__repr__() == "Number of cleavage: 0\nNumber of miscle"\ - "avage: 0\nPositions of miscleavage: []"\ - "\nRatio of miscleavage: 0\nPeptides: [O"\ - "riginal header: Fake2\nNo. peptide: 0\nEn"\ - "zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\ - "ize: 15\nPep. mass: 2014.35098\npKa value"\ - "s from: IPC\nPep. pI: 7.16\nSequence: NPH"\ - "ARDORCOMPLET\n]\n" + aa_pka, 2) + # Multi proc, we an't predict which result will be first or second + results_unsorted = [] + results_unsorted.append("Number of cleavage: 4\nNumber of miscle"\ + "avage: 0\nPositions of miscleavage: []"\ + "\nRatio of miscleavage: 0.0\nPeptides: "\ + "[Original header: Fake1\nNo. peptide: 0\n"\ + "Enzyme: fake_enzyme1\nCleav. pos: 4\nPep."\ + " size: 4\nPep. mass: 534.52598\npKa value"\ + "s from: IPC\nPep. pI: 3.14\nSequence: WQS"\ + "D\n, Original header: Fake1\nNo. peptide:"\ + " 1\nEnzyme: fake_enzyme1\nCleav. pos: 7\n"\ + "Pep. size: 3\nPep. mass: 349.29758\npKa v"\ + "alues from: IPC\nPep. pI: 3.04\nSequence:"\ + " ESD\n, Original header: Fake1\nNo. pepti"\ + "de: 2\nEnzyme: fake_enzyme1\nCleav. pos: "\ + "12\nPep. size: 5\nPep. mass: 495.48938\np"\ + "Ka values from: IPC\nPep. pI: 3.14\nSeque"\ + "nce: FZQSD\n, Original header: Fake1\nNo."\ + " peptide: 3\nEnzyme: fake_enzyme1\nCleav."\ + " pos: 15\nPep. size: 3\nPep. mass: 349.29"\ + "758\npKa values from: IPC\nPep. pI: 3.04"\ + "\nSequence: ESD\n, Original header: Fake1"\ + "\nNo. peptide: 4\nEnzyme: fake_enzyme1\nC"\ + "leav. pos: 16\nPep. size: 1\nPep. mass: 1"\ + "65.19188\npKa values from: IPC\nPep. pI: "\ + "5.97\nSequence: F\n]\n") + results_unsorted.append("Number of cleavage: 0\nNumber of miscle"\ + "avage: 0\nPositions of miscleavage: []"\ + "\nRatio of miscleavage: 0\nPeptides: [O"\ + "riginal header: Fake2\nNo. peptide: 0\nEn"\ + "zyme: fake_enzyme1\nCleav. pos: 0\nPep. s"\ + "ize: 15\nPep. mass: 2014.35098\npKa value"\ + "s from: IPC\nPep. pI: 7.16\nSequence: NPH"\ + "ARDORCOMPLET\n]\n") + assert len(res) == 2 + assert res[0][0].__repr__() in results_unsorted + assert res[1][0].__repr__() in results_unsorted + + # Test wrong fastq file + fastq_file = tmpdir.join("test.fastq") + fastq_file.write("?Fake1\nWQSDESDFZQSDESDF\n+Fake1\nnWQSDESDFZQSDESDF\n@F"\ + "ake2\nNPHARDORCOMPLET\n+Fake2\nnNPHARDORCOMPLET\n") + with pytest.raises(SystemExit) as pytest_wrapped_e: + res = digest.digest_from_input(str(fastq_file), "file", enzymes, mode, + aa_pka, 4) + _, err = capsys.readouterr() + assert err == "Input Error: input file format not recognized (?).\n" + assert pytest_wrapped_e.value.code == 1 + + # Test wrong input type + with pytest.raises(SystemExit) as pytest_wrapped_e: + res = digest.digest_from_input(str(fastq_file), "42", enzymes, mode, + aa_pka, 4) + _, err = capsys.readouterr() + assert err == "Input Error: input type not recognized (42).\n" + assert pytest_wrapped_e.value.code == 1 + +def test_digest_part(tmpdir): + """ Test function 'digest_part(offset_start, offset_end, file, enz, + mode, aa_pka)'""" + # Fake input file + file = tmpdir.join("test.fasta") + file.write(">Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET") + rule_dict = {} + rule_txt = "(S)(D,)" + rule_dict[rule_txt] = True + all_rules = rule.create_rules(rule_dict) + enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) + enz = [enz1] + mode = "sequential" + aa_pka = core.AA_PKA_IPC + # Read the whole file + offset_start = 0 + offset_end = 1000 + + # Get the queries + results_digestion = digest.digest_part(offset_start, offset_end, file, enz, + mode, aa_pka) + # We have 2 res, one for each ref + assert len(results_digestion) == 2 + assert results_digestion[1][0].enzyme_name == "fake_enzyme1" + assert results_digestion[1][0].nb_cleavage == 0 + + # Fake false input file + file = tmpdir.join("test.fasta") + file.write(",Fake1\nWQSDESDFZQS\nDESDF\n>Fake2\nNPHARDORCOMPLET") + rule_dict = {} + rule_txt = "(S)(D,)" + rule_dict[rule_txt] = True + all_rules = rule.create_rules(rule_dict) + enz1 = enzyme.Enzyme(-1, "fake_enzyme1", all_rules) + enz = [enz1] + mode = "sequential" + aa_pka = core.AA_PKA_IPC + # Read the whole file + offset_start = 0 + offset_end = 1000 + # Get the queries + with pytest.raises(ValueError) as pytest_wrapped_e: + results_digestion = digest.digest_part(offset_start, offset_end, file, + enz, mode, aa_pka) + # We have a ValueError + assert pytest_wrapped_e.type == ValueError + assert str(pytest_wrapped_e.value) == "input file format not recognized (,)." diff --git a/tests/test_enzyme.py b/tests/test_enzyme.py index 8d87cde182f38cd9fc133aa3d44a3cb64d73a792..065622051d26d3abe9884c61791bf185d4624db3 100644 --- a/tests/test_enzyme.py +++ b/tests/test_enzyme.py @@ -39,6 +39,7 @@ def test_enzyme(tmpdir): # Test function '__eq__()' assert enz0 == enz1 assert enz0 != enz2 + assert enz0 != 42 """Test function 'write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE)' @@ -61,7 +62,7 @@ def test_enzyme(tmpdir): 's\nAVAILABLE_ENZYMES_USER.append(ENZYME)\nC'\ 'PT_ENZ += 1\n' -def test_check_enzyme_name(): +def test_check_enzyme_name(capsys): """Test function 'check_enzyme_name(name_new_enz, all_name_enz)'.""" # Already taken names @@ -90,21 +91,4 @@ def test_check_enzyme_name(): res = enzyme.check_enzyme_name(seq_name, all_name) assert res is False -''' -def test_user_creation_enzyme(capsys, monkeypatch): - """Test function 'user_creation_enzyme()'""" - monkeypatch.setattr(builtins.input, "Mark") - i = input("What is your name?") - assert i == "Mark" - - - inputs = [10, 'y'] - input_generator = (i for i in inputs) - monkeypatch.setitem('__builtins__.input', lambda prompt: next(input_generator)) - core.user_creation_enzyme() - out, err = capsys.readouterr() - print(out) - print(err) - assert input('how many?') == 10 - assert input('you sure?') == 'y' -''' + capsys.readouterr() diff --git a/tests/test_rule.py b/tests/test_rule.py index 2fc7cb203a83c7451d1ecf2042c70ce3427fab11..51f2d416f85263c1f61ff02b28c803b077b78476 100644 --- a/tests/test_rule.py +++ b/tests/test_rule.py @@ -87,7 +87,7 @@ def test_rule(): 'leaves before D, followed by Q, except...\nD_0.rules.appen'\ 'd(D_0Q2)\nENZ.append(D_0)\n\n' -def test_check_rule(): +def test_check_rule(capsys): """Test function 'check_rule(exprule)'.""" # Good expr = "(,A or B,)" @@ -215,6 +215,8 @@ def test_check_rule(): res = rule.check_rule(expr) assert res == expr + capsys.readouterr() + def test_split_complex_rule(): """Test function 'split_complex_rule(rule)'.""" # No 'or' @@ -307,7 +309,6 @@ def test_create_rules(): all_rules = {} # Simplest rule # cleavage before A - rule1 = "(,A)" all_rules[rule1] = True # Truth for this rule @@ -375,7 +376,7 @@ def test_create_rules(): assert len(res) == 7 # Double comma and 'or' - # Cleaves after I or J, except if K is after, but cleaves if a L is before + # Cleaves before or after I or J, except if K is after, but cleaves if a L is before all_rules = {} rule6 = "(,I or J,)" all_rules[rule6] = True @@ -402,6 +403,20 @@ def test_create_rules(): assert truth6_4 in res assert len(res) == 4 + # Similar complexe rule + # Cleaves before G if there is a H after + # But don't cleaves before G otherwise + all_rules = {} + rule7 = "(,G)(H)" + all_rules[rule7] = True + # Truth for rule7 + truth7_1 = rule.Rule(1, "H", True, -1) + truth7 = rule.Rule(0, "G", False, 0) # Don't cleaves after G + truth7.rules.append(truth7_1) + res = rule.create_rules(all_rules) + assert truth7 in res + assert len(res) == 1 + def test_handle_rule(): """Test function 'handle_rule(seq, pos, a_rule, cleavage)'""" diff --git a/tests/test_sequence.py b/tests/test_sequence.py index 1dc8639c14db301ffd0944e816c6520312a4c9f1..63fe3332e148df82fbbc4f177ef150ae7ae9d0c6 100644 --- a/tests/test_sequence.py +++ b/tests/test_sequence.py @@ -1,8 +1,42 @@ """Tests for sequence.py""" import pytest from .context import rpg +from rpg import core from rpg import sequence +def test_peptide(): + """Test class 'Peptide'""" + header = "fake_sequence" + seq = "QWSDESDF" + enz_name = "Pwet" + aa_pka = core.AA_PKA_IPC + nb_peptide = 42 + pep0 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide) + + # Test function '__repr__()' + assert pep0.__repr__() == "Original header: fake_sequence\nNo. peptide: "\ + "42\nEnzyme: Pwet\nCleav. pos: 0\nPep. size: 8"\ + "\nPep. mass: 1012.98488\npKa values from: IPC"\ + "\nPep. pI: 2.91\nSequence: QWSDESDF\n" + + header = "fake_sequence" + seq = "QWSDESDF" + enz_name = "Pwet" + aa_pka = core.AA_PKA_IPC + nb_peptide = 42 + pep1 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide) + + header = "fake_sequence" + seq = "QWSDESDW" + enz_name = "Pwet" + aa_pka = core.AA_PKA_IPC + nb_peptide = 42 + pep2 = sequence.Peptide(header, seq, enz_name, aa_pka, nb_peptide) + + assert pep0 == pep1 + assert pep0 != pep2 + assert pep0 != 42 + def test_sequence(): """Test class 'Sequence'""" header = "fake_sequence" @@ -28,6 +62,7 @@ def test_sequence(): assert seq0 == seq1 assert seq0 != seq2 assert seq0 != seq3 + assert seq0 != 42 def test_check_sequence(capsys): """ Test function 'check_sequence(seq)'""" @@ -35,10 +70,7 @@ def test_check_sequence(capsys): assert sequence.check_sequence("aiHZODHUoh") == "AIHZODHUOH" # Bad symbol - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: sequence.check_sequence("a%HZODHUoh") - _, err = capsys.readouterr() - assert err == "Sequence Error: amino acid \"%\" in A%HZODHUOH not recogni"\ - "zed.\n" - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 1 + assert "amino acid \"%\" in A%HZODHUOH not "\ + "recognized." in str(pytest_wrapped_e.value)