diff --git a/README.md b/README.md index 6547fd1d2ef2c00e2427dec587eddecab2c43bd6..c2d20756de2ca586447f4cee23d62d51dd64c592 100644 --- a/README.md +++ b/README.md @@ -48,5 +48,5 @@ Config parameters: ```bash snakemake -s Snakefile_data_simu --config n=10000 m=[4,6,8,10,12] m_dev=[0,0.5,1,2,3] - snakemake -s Snakefile_d2 --config input=[snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]] + snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf] ``` diff --git a/deconvolution/d2_algorithms.py b/deconvolution/d2graph/d2_algorithms.py similarity index 98% rename from deconvolution/d2_algorithms.py rename to deconvolution/d2graph/d2_algorithms.py index 44ba08a0180926d6325f337d4b14f917795a3707..1391181e24ead30872b1af6e3665b86eff8f7bb8 100644 --- a/deconvolution/d2_algorithms.py +++ b/deconvolution/d2graph/d2_algorithms.py @@ -1,7 +1,6 @@ import networkx as nx -from itertools import combinations -from d2_path import Path, Unitig +from d2graph.d2_path import Unitig """ Remove unnecessary transitions diff --git a/deconvolution/d2_graph.py b/deconvolution/d2graph/d2_graph.py similarity index 96% rename from deconvolution/d2_graph.py rename to deconvolution/d2graph/d2_graph.py index 450e1cdd7a16031a0d9e20014b2b88a7fc439658..57bc14176dcc79d9eec8336a335e3d558c1377c4 100644 --- a/deconvolution/d2_graph.py +++ b/deconvolution/d2graph/d2_graph.py @@ -3,7 +3,7 @@ import itertools from bidict import bidict import sys -from d_graph import Dgraph, compute_all_max_d_graphs, filter_dominated, list_domination_filter +from dgraph.d_graph import Dgraph, compute_all_max_d_graphs, list_domination_filter class D2Graph(nx.Graph): @@ -56,7 +56,6 @@ class D2Graph(nx.Graph): def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None): - import debug_disct as dd # Compute all the d-graphs if verbose: print("Computing the unit d-graphs..") @@ -79,7 +78,7 @@ class D2Graph(nx.Graph): # Index all the d-graphs if verbose: - print("Compute the dmer index") + print("Compute the dmer dgraph") self.index = self.create_index_from_tuples(index_size, verbose=verbose) self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose) # Compute node distances for pair of dgraphs that share at least 1 dmer. @@ -209,7 +208,7 @@ class D2Graph(nx.Graph): to_remove = set() if verbose: - print("\tFilter dominated in index") + print("\tFilter dominated in dgraph") # Find dominated for dmer_idx, item in enumerate(self.index.items()): @@ -242,7 +241,7 @@ class D2Graph(nx.Graph): self.all_d_graphs.remove(r_dg) self.d_graphs_per_node[r_dg.center].remove(r_dg) - # Remove dominated in index + # Remove dominated in dgraph for dmer in itertools.combinations(r_dg.to_sorted_list(), tuple_size): if r_dg in self.index[dmer]: self.index[dmer] = list(filter(lambda x: x!=r_dg, self.index[dmer])) diff --git a/deconvolution/d2_path.py b/deconvolution/d2graph/d2_path.py similarity index 100% rename from deconvolution/d2_path.py rename to deconvolution/d2graph/d2_path.py diff --git a/deconvolution/d2_reduction.py b/deconvolution/d2graph/d2_reduction.py similarity index 96% rename from deconvolution/d2_reduction.py rename to deconvolution/d2graph/d2_reduction.py index 9eb85ff1bdb8f47f2bb42be8e72aecfeb818fded..dd534aca826800ccf8342eb8f939a66e0d22bab8 100755 --- a/deconvolution/d2_reduction.py +++ b/deconvolution/d2graph/d2_reduction.py @@ -4,8 +4,7 @@ import networkx as nx import argparse import sys -import d2_graph as d2 -import d2_algorithms as d2a +from d2graph import d2_algorithms as d2a, d2_graph as d2 def parse_arguments(): diff --git a/deconvolution/path_algorithms.py b/deconvolution/d2graph/path_algorithms.py similarity index 99% rename from deconvolution/path_algorithms.py rename to deconvolution/d2graph/path_algorithms.py index 251c28b23ce814bd9e1897bdde173af3e1e1aa58..e22c93c84661e7c0642086d5f1a8f585f4019a9a 100644 --- a/deconvolution/path_algorithms.py +++ b/deconvolution/d2graph/path_algorithms.py @@ -1,5 +1,4 @@ -import networkx as nx -from d2_path import Path +from d2graph.d2_path import Path """ Greedy algorithm. Start with th most probable unitig (ie lowest normalized penalty first and largest unitig for equalities). Then extends on both side to the nearest interesting unitig. diff --git a/deconvolution/path_optimization.py b/deconvolution/d2graph/path_optimization.py similarity index 98% rename from deconvolution/path_optimization.py rename to deconvolution/d2graph/path_optimization.py index 4b8e7681cb879f8a120a0df6c2004275e186f8c1..180f07b758dea2719e302a3e7eb9c6927e9dc154 100644 --- a/deconvolution/path_optimization.py +++ b/deconvolution/d2graph/path_optimization.py @@ -1,5 +1,5 @@ import random -from d2_path import Path +from d2graph.d2_path import Path import networkx as nx diff --git a/deconvolution/debug_disct.py b/deconvolution/debug_disct.py deleted file mode 100644 index 67f14303c78ed3e2033de31adbed58b0929d681e..0000000000000000000000000000000000000000 --- a/deconvolution/debug_disct.py +++ /dev/null @@ -1,53 +0,0 @@ - -def save(dict, filename): - with open(filename, "w") as fp: - for key, array in dict.items(): - fp.write(str(len(array)) + " " + key + "\n") - fp.write('\n'.join([str(sorted(x.nodes)) for x in array]) + "\n") - print(filename, "saved") - - -def load(filename): - d = {} - with open(filename) as fp: - value = None - nb_vals = 0 - for line in fp: - line = line.strip() - - if value == None: - first_space = line.find(' ') - nb_vals = int(line[:first_space]) - value = line[first_space+1:] - d[value] = [] - else: - d[value].append(line.strip()) - nb_vals -= 1 - if nb_vals == 0: - value = None - - print(filename, "loaded") - return d - - -def compare(d1, d2): - remaining = set(d2.keys()) - - for key in d1: - if key not in d2: - print(key, "not present in d2") - - remaining.remove(key) - l1 = sorted([str(sorted(x.nodes)) for x in d1[key]]) - l2 = sorted([str(x) for x in d2[key]]) - - if l1 != l2: - print(f"{key}: disimilar lists:") - s1 = set(l1) - s2 = set(l2) - print(s1 - s2) - print(s2 - s1) - - for key in remaining: - print(key, "not present in d1") - diff --git a/deconvolution/dgraph/AbstractDGIndex.py b/deconvolution/dgraph/AbstractDGIndex.py new file mode 100644 index 0000000000000000000000000000000000000000..db426e60068feb596421eff37b5e04aaddfadcad --- /dev/null +++ b/deconvolution/dgraph/AbstractDGIndex.py @@ -0,0 +1,59 @@ +from itertools import combinations + + +class AbstractDGIndex(dict): + + def __init__(self, fixed_size=False, size=3): + """ This class represent a d_graph dgraph. + Each key in the dgraph is a set of barcodes and each value a list of dgraphs containing these barcodes. + :param fixed_size: True if the keys in the dgraph have a fixed size. + :param size: The size of the key sets if fixed size, the min size otherwise. + """ + super(AbstractDGIndex, self).__init__() + self.fixed_size = fixed_size + self.size = size + + + def _add_value(self, key_set, dgraph): + """ Add the couple key (set of barcodes) and value (dgraph) at the right place in the dict + """ + # Test the key size + if self.fixed_size and len(key_set) != self.size: + raise ValueError("Wrong set size in the dgraph") + elif (not self.fixed_size) and len(key_set) < len(dgraph.node_set) - self.size: + raise ValueError("Wrong set size in the dgraph") + key_set = frozenset(key_set) + + # Add the key if not already present + if key_set not in self: + self[key_set] = set() + + # Associate the value with the key + self[key_set].add(dgraph) + + + def add_dgraph(self, dg): + """ Generate all the set needed for keys in the dgraph and push the d-graph as value. + For fixed size of the dgraph all the sets of this size will be generated as key. + Otherwise, all the set of size at least len(dg) - size will be generated. + """ + barcodes = dg.node_set + + if self.fixed_size: + for tup in combinations(barcodes, self.size): + self._add_value(frozenset(tup), dg) + else: + for size in range(len(barcodes)-self.size, len(barcodes)+1): + for tup in combinations(barcodes, size): + self._add_value(frozenset(tup), dg) + + + def __contains__(self, key): + key = frozenset(key) + return super(AbstractDGIndex, self).__contains__(key) + + def __getitem__(self, key): + return super(AbstractDGIndex, self).__getitem__(self.__keytransform__(key)) + + def __keytransform__(self, key): + return frozenset(key) diff --git a/tests/__init__.py b/deconvolution/dgraph/__init__.py similarity index 100% rename from tests/__init__.py rename to deconvolution/dgraph/__init__.py diff --git a/deconvolution/d_graph.py b/deconvolution/dgraph/d_graph.py similarity index 100% rename from deconvolution/d_graph.py rename to deconvolution/dgraph/d_graph.py diff --git a/deconvolution/graph_manipulator.py b/deconvolution/dgraph/graph_manipulator.py similarity index 100% rename from deconvolution/graph_manipulator.py rename to deconvolution/dgraph/graph_manipulator.py diff --git a/deconvolution/analyse_d2_tsv.py b/deconvolution/main/analyse_d2_tsv.py similarity index 100% rename from deconvolution/analyse_d2_tsv.py rename to deconvolution/main/analyse_d2_tsv.py diff --git a/deconvolution/d2_to_path.py b/deconvolution/main/d2_to_path.py similarity index 96% rename from deconvolution/d2_to_path.py rename to deconvolution/main/d2_to_path.py index e14ec936e89a9717674472166ca9bf70909b4c58..444a14fffabb85c94240efff14c4b9ab3fdfe0ab 100755 --- a/deconvolution/d2_to_path.py +++ b/deconvolution/main/d2_to_path.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 import networkx as nx -import path_optimization as po import argparse import sys -import d2_graph as d2 +from d2graph import d2_graph as d2, path_optimization as po def parse_arguments(): diff --git a/deconvolution/evaluate.py b/deconvolution/main/evaluate.py similarity index 99% rename from deconvolution/evaluate.py rename to deconvolution/main/evaluate.py index d05d75219b79bedf35a29fadad18ad27a40fc0d6..7b18f389e0b670341dfe5e921c79423c2f127167 100755 --- a/deconvolution/evaluate.py +++ b/deconvolution/main/evaluate.py @@ -6,8 +6,6 @@ import argparse from termcolor import colored import networkx as nx -from d2_graph import D2Graph - def parse_args(): parser = argparse.ArgumentParser(description='Process some integers.') diff --git a/deconvolution/generate_fake_barcode_graph.py b/deconvolution/main/generate_fake_barcode_graph.py similarity index 98% rename from deconvolution/generate_fake_barcode_graph.py rename to deconvolution/main/generate_fake_barcode_graph.py index 15ddba102df2637e69086cbc1fc8a70f038a08f3..f61631d8e6fef593c5961dcd179c823de22b7183 100755 --- a/deconvolution/generate_fake_barcode_graph.py +++ b/deconvolution/main/generate_fake_barcode_graph.py @@ -4,7 +4,7 @@ import networkx as nx import random import argparse -import graph_manipulator as gm +from dgraph import graph_manipulator as gm def parse_arguments(): diff --git a/deconvolution/generate_fake_molecule_graph.py b/deconvolution/main/generate_fake_molecule_graph.py similarity index 97% rename from deconvolution/generate_fake_molecule_graph.py rename to deconvolution/main/generate_fake_molecule_graph.py index a8455e96fab075c7adf41a1ffdb9fc82f9017510..a859ff53c08f761eaae2c8afcc21620f95f673a9 100755 --- a/deconvolution/generate_fake_molecule_graph.py +++ b/deconvolution/main/generate_fake_molecule_graph.py @@ -2,7 +2,7 @@ import argparse -import graph_manipulator as gm +from dgraph import graph_manipulator as gm def parse_arguments(): diff --git a/deconvolution/gexf_converter.py b/deconvolution/main/gexf_converter.py similarity index 100% rename from deconvolution/gexf_converter.py rename to deconvolution/main/gexf_converter.py diff --git a/deconvolution/to_d2_graph.py b/deconvolution/main/to_d2_graph.py similarity index 98% rename from deconvolution/to_d2_graph.py rename to deconvolution/main/to_d2_graph.py index b233393e4b46e8c2c1c4218853aa633326b6419b..4a2d1befe44d868aa7dbb66ec6dccca529534391 100755 --- a/deconvolution/to_d2_graph.py +++ b/deconvolution/main/to_d2_graph.py @@ -4,7 +4,7 @@ import networkx as nx import argparse import sys -import d2_graph as d2 +from d2graph import d2_graph as d2 def parse_arguments(): diff --git a/tests/Index_test.py b/tests/Index_test.py new file mode 100644 index 0000000000000000000000000000000000000000..1c7963f154cc81f862386b39ac1ef43a394a7459 --- /dev/null +++ b/tests/Index_test.py @@ -0,0 +1,66 @@ +import unittest + +from random import randint +from dgraph.AbstractDGIndex import AbstractDGIndex +from dgraph.d_graph import Dgraph +from dgraph.graph_manipulator import generate_d_graph_chain + + +class TestIndex(unittest.TestCase): + def test_construction(self): + for _ in range(10): + size = randint(1, 50) + index = AbstractDGIndex(size=size) + self.assertEqual(len(index), 0) + self.assertEqual(index.size, size) + + + def test_wrong_size_filling(self): + index = AbstractDGIndex(fixed_size=True, size=3) + key = frozenset({'A', 'B'}) + val = "Test" + with self.assertRaises(ValueError): + index._add_value(key, val) + + + def test_fill_static(self): + index = AbstractDGIndex(fixed_size=True, size=3) + key = frozenset({'A', 'B', 'C'}) + val = "Test" + index._add_value(key, val) + self.assertEqual(len(index), 1) + self.assertTrue(key in index) + self.assertEqual(index[key], {val}) + + + def test_fixed_size(self): + dg = _generate_dg(2) + index = AbstractDGIndex(fixed_size=True, size=2) + index.add_dgraph(dg) + print("\n".join([str(k) for k in index.keys()])) + self.assertEqual(len(index), 10) + + + def test_variable_size(self): + dg = _generate_dg(2) + index = AbstractDGIndex(fixed_size=False, size=2) + index.add_dgraph(dg) + print("\n".join([str(k) for k in index.keys()])) + self.assertEqual(len(index), 16) + + +def _generate_dg(d): + # nx graph construction + G = generate_d_graph_chain(2*d+1, d) + center = d + h1 = list(G.subgraph([x for x in range(d)]).nodes()) + h2 = list(G.subgraph([2*d-x for x in range(d)]).nodes()) + + # d-graph construction + dg = Dgraph(center) + dg.put_halves(h1, h2, G) + return dg + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/d2_algorithms_test.py b/tests/d2_algorithms_test.py index 052abf9a38afeda54571c931fb5bee1609249464..0516a8f21188d0b3d8f027aec02862872220f993 100644 --- a/tests/d2_algorithms_test.py +++ b/tests/d2_algorithms_test.py @@ -1,8 +1,5 @@ import unittest -import graph_manipulator as gm -from d2_graph import D2Graph - class TestD2Algorithms(unittest.TestCase): diff --git a/tests/d2_graph_test.py b/tests/d2_graph_test.py index e11cd35da3c33a4d15e069a9b161e12d16fc0f7c..5147a815ac4f756a3a61f195d04e49b3a7e1f4fa 100644 --- a/tests/d2_graph_test.py +++ b/tests/d2_graph_test.py @@ -3,11 +3,10 @@ import tempfile import networkx as nx from scipy.special import comb -from d2_graph import D2Graph -from d_graph import Dgraph -import graph_manipulator as gm +from d2graph.d2_graph import D2Graph +from dgraph import graph_manipulator as gm -from tests.d_graph_data import complete_graph +from d_graph_data import complete_graph class TestD2Graph(unittest.TestCase): @@ -22,7 +21,7 @@ class TestD2Graph(unittest.TestCase): else: self.assertEqual(0, len(candidates)) - # Evaluate the index + # Evaluate the dgraph self.assertEqual(13, len(d2.index)) overlap_key = ('A1', 'A2', 'B0', 'B1', 'B2', 'C') @@ -47,7 +46,7 @@ class TestD2Graph(unittest.TestCase): awaited_d_num = size - 2 * d self.assertEqual(awaited_d_num, len(d2.all_d_graphs)) - # Test index + # Test dgraph awaited_index_size = comb(2 * d + 1, index_k) + (size - (2 * d + 1)) * comb(2 * d, index_k - 1) if len(d2.index) != awaited_index_size: dmers = [list(x) for x in d2.index] diff --git a/tests/d_graph_test.py b/tests/d_graph_test.py index 0e714945ee609c5c2818f38bdb2ccf799ce95281..ddbc25903689b6d9f9c25a08041634379795e2d3 100644 --- a/tests/d_graph_test.py +++ b/tests/d_graph_test.py @@ -1,9 +1,8 @@ import unittest -from tests.d_graph_data import unit_d_graph -from d_graph import Dgraph -import graph_manipulator as gm - +from d_graph_data import unit_d_graph +from dgraph.d_graph import Dgraph +from dgraph import graph_manipulator as gm class TestDGraph(unittest.TestCase): diff --git a/tests/graph_manipulation_test.py b/tests/graph_manipulation_test.py index 033001aafd27e472a53ca51aec22172ad6a0780f..4ff4a38480133804c3c18071d0b3eea34c2a0e92 100644 --- a/tests/graph_manipulation_test.py +++ b/tests/graph_manipulation_test.py @@ -1,7 +1,6 @@ import unittest -import graph_manipulator as gm - +from dgraph import graph_manipulator as gm class TestGraphManipulation(unittest.TestCase):