diff --git a/frags/FindRecombinationsAmongGenomes.py b/frags/FindRecombinationsAmongGenomes.py index eb0cdcdfc703e7d8c7b0d220941bc6d8b86da9f8..1cc88d845b1a8d700f861b81480b91db5fcacef2 100644 --- a/frags/FindRecombinationsAmongGenomes.py +++ b/frags/FindRecombinationsAmongGenomes.py @@ -15,7 +15,7 @@ import subprocess from frags import core -__version_info__ = ('0', '1', '4') +__version_info__ = ('0', '1', '5') __version__ = '.'.join(__version_info__) __revision_date__ = "2020-12-17" __author__ = "Nicolas Maillet" @@ -40,8 +40,8 @@ def create_parser(): "genomes) and potential "\ "breakpoints (inserts "\ "between fragments). "\ - "Optionally, breakpoints can "\ - "then be Blasted again the "\ + "Optionally, breakpoints can"\ + " then be Blasted again the "\ "host genome, if provided.") parser.add_argument("-i", "--inputfiles", type=str, nargs='+', diff --git a/frags/core.py b/frags/core.py index 90bc8ce72eae6eaf24a91382d9bed01598e580be..d3556b56736c9413fc9875bdaee8353999eab323 100644 --- a/frags/core.py +++ b/frags/core.py @@ -162,9 +162,9 @@ def next_read(file, offset_start, offset_end): beg_line_offset = in_file.tell() # Not a valid file else: - print("File error: enable to understand type of file {} "\ - "({})".format(file, first_line[0])) - sys.exit(1) + # Stop the generator with the error to show + raise ValueError("File error: enable to understand type of file "\ + "{} ({})".format(file, first_line[0])) def build_graph(ref, k): """ Index each k-mers of a genome @@ -254,37 +254,39 @@ def get_recombinations(offset_start, offset_end, file, k, gap, graph1, graph2=No """ # Resulting Reads of current offset range all_queries = [] + try: + # Query each read, one by one, in the offset range + for header, sequence in next_read(file, offset_start, offset_end): + # Construct the Read + query = read.Read(header, sequence) - # Query each read, one by one, in the offset range - for header, sequence in next_read(file, offset_start, offset_end): - # Construct the Read - query = read.Read(header, sequence) - - # Find hits for this query - hits = find_hits(graph1, query.sequence) - # Transforms hits to matches - query.get_matches(hits, gap, k, 0, 1) - # Find hits for this query (Rev comp) - hits_rc = find_hits(graph1, reverse_complement(query.sequence)) - # Transforms hits to matches - query.get_matches(hits_rc, gap, k, 1, 1) - - # Two ref files - if graph2: # Find hits for this query - hits = find_hits(graph2, query.sequence) + hits = find_hits(graph1, query.sequence) # Transforms hits to matches - query.get_matches(hits, gap, k, 0, 2) + query.get_matches(hits, gap, k, 0, 1) # Find hits for this query (Rev comp) - hits_rc = find_hits(graph2,reverse_complement(query.sequence)) + hits_rc = find_hits(graph1, reverse_complement(query.sequence)) # Transforms hits to matches - query.get_matches(hits_rc, gap, k, 1, 2) - - # Create the breakpoints - query.get_breakpoints() - - # Add this query to the result - all_queries.append(query) + query.get_matches(hits_rc, gap, k, 1, 1) + + # Two ref files + if graph2: + # Find hits for this query + hits = find_hits(graph2, query.sequence) + # Transforms hits to matches + query.get_matches(hits, gap, k, 0, 2) + # Find hits for this query (Rev comp) + hits_rc = find_hits(graph2,reverse_complement(query.sequence)) + # Transforms hits to matches + query.get_matches(hits_rc, gap, k, 1, 2) + + # Create the breakpoints + query.get_breakpoints() + + # Add this query to the result + all_queries.append(query) + except ValueError as exc: + raise exc # Add the global result into the queue return all_queries @@ -311,26 +313,32 @@ def get_all_queries(file, nb_proc, k, gap, graph1, graph2=None): chunk_size = total_size // nb_proc # Starting offset offset_start = 0 - # Create the pool of process - pool = Pool() - # Partial function to fix all but firsts arguments - prod_recomb=partial(get_recombinations, file=file, k=k, gap=gap, - graph1=graph1, graph2=graph2) - # All tuples of offset_start, offset_end - all_offsets = [] - # For each thread/chunk - for _ in range(nb_proc - 1): - # Compute the ending offset for this chunk - offset_end = offset_start + chunk_size - # Add this couple of start/end - all_offsets.append((offset_start, offset_end)) - # Next start is where it stops - offset_start = offset_start + chunk_size - # Add the last chunk - all_offsets.append((offset_start, total_size)) - - # Launch all process (Results is a list of list) - results = pool.starmap(prod_recomb, all_offsets) + try: + # Create the pool of process + pool = Pool() + # Partial function to fix all but firsts arguments + prod_recomb=partial(get_recombinations, file=file, k=k, gap=gap, + graph1=graph1, graph2=graph2) + # All tuples of offset_start, offset_end + all_offsets = [] + # For each thread/chunk + for _ in range(nb_proc - 1): + # Compute the ending offset for this chunk + offset_end = offset_start + chunk_size + # Add this couple of start/end + all_offsets.append((offset_start, offset_end)) + # Next start is where it stops + offset_start = offset_start + chunk_size + # Add the last chunk + all_offsets.append((offset_start, total_size)) + + # Launch all process (Results is a list of list) + results = pool.starmap(prod_recomb, all_offsets) + except ValueError as exc: + print(exc) + pool.terminate() + sys.exit(1) + pool.terminate() # Get a flatten list all_queries = [] diff --git a/tests/test_core.py b/tests/test_core.py index 02296aad258da5b54b7f5b8aa12f5072fae6b9ca..7d371861cba77328e453a74b128cebfdf750029e 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -144,14 +144,13 @@ def test_next_read(capsys, tmpdir): fastqgz_file = tmpdir.join("test.fastq.gz") with gzip.open(fastqgz_file, "wb") as fil: fil.write(data) - with pytest.raises(SystemExit) as pytest_wrapped_e: + with pytest.raises(ValueError) as pytest_wrapped_e: res = core.next_read(fastqgz_file, 0, 35) a_read = next(res) - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 1 - captured = capsys.readouterr() - assert "File error: enable to understand type of file {} "\ - "(+)".format(fastqgz_file) in captured.out + assert pytest_wrapped_e.type == ValueError + assert str(pytest_wrapped_e.value) == "File error: enable to understand "\ + "type of file {} "\ + "(+)".format(fastqgz_file) def test_build_graph(capsys): """ Test function 'build_graph(ref, k)'""" @@ -243,6 +242,37 @@ def test_get_recombinations(tmpdir): assert all_queries[1].matches[0].__repr__() == "(1, 0, 11, 3, 14, 11, [0]"\ ", 2)" + """ Test function 'get_recombinations(offset_start, offset_end, + file, k, gap, graph1, + graph2=None)'""" + # Fake false input file + file = tmpdir.join("test.fasta") + file.write("?Fake1\nACGTTATATGTTGATTAGCTGA\nTGTG\n>Fake2\nCAGTACTAGCAGGTA"\ + "TGCTAGA") + # Fake ref + ref = "AAACGACGTTATATGTCGATTAGCTGAAAC" + # Fake second ref + ref2 = "TTTTGCTAGTACTGTTT" + # Gap size + gap = 1 + # Kmer size + k = 10 + # Read the whole file + offset_start = 0 + offset_end = 1000 + # Build graphs + graph1 = core.build_graph(ref, k) + graph2 = core.build_graph(ref2, k) + # Get the queries + with pytest.raises(ValueError) as pytest_wrapped_e: + all_queries = core.get_recombinations(offset_start, offset_end, file, + k, gap, graph1, graph2) + # We have a ValueError + assert pytest_wrapped_e.type == ValueError + assert str(pytest_wrapped_e.value) == "File error: enable to understand "\ + "type of file {} "\ + "(?)".format(file) + def test_get_all_queries(tmpdir): """ Test function 'get_all_queries(file, nb_proc, k, gap, graph1, graph2=None)'""" diff --git a/tests/test_functional.py b/tests/test_functional.py index 1b354c9524ff388130779d65bf5be48d0a4f5e48..3a337fe79ebbd515c1bc6dc1bdfb0250683b7f40 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -1,13 +1,14 @@ """Functional tests for FRAG.py""" import os +import pytest import unittest.mock from collections import defaultdict from shutil import copyfile from frags import FindRecombinationsAmongGenomes from .context import frags -def test_main(tmpdir): +def test_main(tmpdir, capsys): """ Test the functional behavior of FRAG """ # Output folder @@ -264,6 +265,81 @@ def test_main(tmpdir): "(+)(1)0:39|(+)(2)74:37|(+)(2)152:43\t(1)1792:39|(2)268:37|"\ "(2)607:43\t39|37|43\t0|0|0\t1|2\n") + # Input fastq file to analyze with error + false_file = tmpdir.join("false_functional_tests.fastq") + false_file.write("@10_insertions_in_contiguous_read\n"\ + "ATGGTGCGAAGAGTCTATTGAGCTAATTGGTAGTCCTCCGGCCAAACCCTTTGCCTGAA"\ + "TGCGGCTAATCCCAACTGCGGAGCAGATACCCACATGCCAGTGGGCAGTCT\n"\ + "+expected_results_A.csv\n"\ + "10_insertions_in_contiguous_read\t0\t0\tNone\t(+)(1)0:110\t"\ + "(1)413:110\t110\t10\tNone\n"\ + "?11_insertions_in_contiguous_read\n"\ + "ATGGTGCGAAGAGTCTATTGAGCTAATTGGTAGTCCTCCGGCCAAACCCTTTGGCCTGA"\ + "ATGCGGCTAATCCCAACTGCGGAGCAGATACCCACATGCCAGTGGGCAGTCT\n"\ + "+expected_results_A.csv\n"\ + "11_insertions_in_contiguous_read\t0\t1\t43:11\t(+)(1)0:43|"\ + "(+)(1)54:57\t(1)413:43|(1)456:57\t43|57\t0|0\tNone\n"\ + "@A_then_2_B_in_rc\n"\ + "TGTGAACAAGGTGTGAAGGCATCGCCGTGTTCGACGGCAACCAACGCAGCCTGGACCAC"\ + "CGTCGCCGGTGGGGAACACAGACTTACGCGTTACGACAGGCTAATTGCTGGATTGC\n"\ + "+expected_results_A_and_B.csv\n"\ + "A_then_2_B_in_rc\t2\t2\t40:0|75:0\t(+)(1)0:40|(-)(2)40:35|"\ + "(-)(2)75:40\t(1)1126:40|(2)336:35|(2)495:40\t40|35|40\t"\ + "0|0|0\tNone\n"\ + "@Nothing\n"\ + "AGAGAGAGAGAGAGAGAGAGAGAGAGAGAGAGGAGAGAGAGAGAGAGAGA\n"\ + "+expected_results_unmatched.csv\n"\ + "Nothing\n" + "@A_then_Blast_then_B_then_Blast_then_B\n"\ + "ATCACTTTGATGGGTATAAACAACAAGCGGTAGTGATTAACAAGAGGAGCAAGAACAAC"\ + "ATCAAGAGTCAGGGCTGGAATCTTCGATGCGTTGCGCTCAGCACTCAACCCCCTTCTTA"\ + "TTGTCATAACAGTGGTATCTGACTCCCACCTTCAATTGTTATCATAAAGCGAGTTGGAT"\ + "TGGCCATCCAGTGAGAAT\n"\ + "+expected_results_A_and_B.csv\n"\ + "A_then_Blast_then_B_then_Blast_then_B\t0\t2\t39:35|111:41\t"\ + "(+)(1)0:39|(+)(2)74:37|(+)(2)152:43\t(1)1792:39|(2)268:37|"\ + "(2)607:43\t39|37|43\t0|0|0\t1|2\n") + + # Input fastq file to analyze with error + false_fasta_file = tmpdir.join("false_functional_tests.fasta") + false_fasta_file.write("?10_insertions_in_contiguous_read\n"\ + "ATGGTGCGAAGAGTCTATTGAGCTAATTGGTAGTCCTCCGGCCAAACCCTTTGCCTGAA"\ + "TGCGGCTAATCCCAACTGCGGAGCAGATACCCACATGCCAGTGGGCAGTCT\n"\ + "?11_insertions_in_contiguous_read\n"\ + "ATGGTGCGAAGAGTCTATTGAGCTAATTGGTAGTCCTCCGGCCAAACCCTTTGGCCTGA"\ + "ATGCGGCTAATCCCAACTGCGGAGCAGATACCCACATGCCAGTGGGCAGTCT\n"\ + ">A_then_2_B_in_rc\n"\ + "TGTGAACAAGGTGTGAAGGCATCGCCGTGTTCGACGGCAACCAACGCAGCCTGGACCAC"\ + "CGTCGCCGGTGGGGAACACAGACTTACGCGTTACGACAGGCTAATTGCTGGATTGC\n"\ + ">A_then_Blast_then_B_then_Blast_then_B\n"\ + "ATCACTTTGATGGGTATAAACAACAAGCGGTAGTGATTAACAAGAGGAGCAAGAACAAC"\ + "ATCAAGAGTCAGGGCTGGAATCTTCGATGCGTTGCGCTCAGCACTCAACCCCCTTCTTA"\ + "TTGTCATAACAGTGGTATCTGACTCCCACCTTCAATTGTTATCATAAAGCGAGTTGGAT"\ + "TGGCCATCCAGTGAGAAT\n") + + # Try the full software with wrong fastq file + with unittest.mock.patch("sys.argv", ["func_test", "-i", str(false_file), + "-r", str(ref_a), str(ref_b), + "-o", str(output_folder), "-k", "30", + "-m", "10", "-p", "4", "-b", "-t", + str(h_file)]): + assert FindRecombinationsAmongGenomes.main() is None + + # Try the full software with wrong fasta file + with pytest.raises(SystemExit) as pytest_wrapped_e: + with unittest.mock.patch("sys.argv", ["func_test", "-i", + str(false_fasta_file), + "-r", str(ref_a), str(ref_b), + "-o", str(output_folder), "-k", + "30", "-m", "10", "-p", "4", + "-b", "-t", str(h_file)]): + FindRecombinationsAmongGenomes.main() + assert pytest_wrapped_e.type == SystemExit + # Error output + captured = capsys.readouterr() + assert "File error: enable to understand type of file {} "\ + "(?)".format(false_fasta_file) in captured.out + # Try the full software with unittest.mock.patch("sys.argv", ["func_test", "-i", str(f_file), "-r", str(ref_a), str(ref_b),