From 5e05d845770e7692ce365cd1b60cd15f98fba814 Mon Sep 17 00:00:00 2001 From: Yoann Dufresne <yoann.dufresne0@gmail.com> Date: Thu, 9 Jan 2020 10:30:46 +0100 Subject: [PATCH] refactor directories --- README.md | 2 +- deconvolution/{ => d2graph}/d2_algorithms.py | 3 +- deconvolution/{ => d2graph}/d2_graph.py | 9 ++- deconvolution/{ => d2graph}/d2_path.py | 0 deconvolution/{ => d2graph}/d2_reduction.py | 3 +- .../{ => d2graph}/path_algorithms.py | 3 +- .../{ => d2graph}/path_optimization.py | 2 +- deconvolution/debug_disct.py | 53 --------------- deconvolution/dgraph/AbstractDGIndex.py | 59 +++++++++++++++++ {tests => deconvolution/dgraph}/__init__.py | 0 deconvolution/{ => dgraph}/d_graph.py | 0 .../{ => dgraph}/graph_manipulator.py | 0 deconvolution/{ => main}/analyse_d2_tsv.py | 0 deconvolution/{ => main}/d2_to_path.py | 3 +- deconvolution/{ => main}/evaluate.py | 2 - .../{ => main}/generate_fake_barcode_graph.py | 2 +- .../generate_fake_molecule_graph.py | 2 +- deconvolution/{ => main}/gexf_converter.py | 0 deconvolution/{ => main}/to_d2_graph.py | 2 +- tests/Index_test.py | 66 +++++++++++++++++++ tests/d2_algorithms_test.py | 3 - tests/d2_graph_test.py | 11 ++-- tests/d_graph_test.py | 7 +- tests/graph_manipulation_test.py | 3 +- 24 files changed, 147 insertions(+), 88 deletions(-) rename deconvolution/{ => d2graph}/d2_algorithms.py (98%) rename deconvolution/{ => d2graph}/d2_graph.py (96%) rename deconvolution/{ => d2graph}/d2_path.py (100%) rename deconvolution/{ => d2graph}/d2_reduction.py (96%) rename deconvolution/{ => d2graph}/path_algorithms.py (99%) rename deconvolution/{ => d2graph}/path_optimization.py (98%) delete mode 100644 deconvolution/debug_disct.py create mode 100644 deconvolution/dgraph/AbstractDGIndex.py rename {tests => deconvolution/dgraph}/__init__.py (100%) rename deconvolution/{ => dgraph}/d_graph.py (100%) rename deconvolution/{ => dgraph}/graph_manipulator.py (100%) rename deconvolution/{ => main}/analyse_d2_tsv.py (100%) rename deconvolution/{ => main}/d2_to_path.py (96%) rename deconvolution/{ => main}/evaluate.py (99%) rename deconvolution/{ => main}/generate_fake_barcode_graph.py (98%) rename deconvolution/{ => main}/generate_fake_molecule_graph.py (97%) rename deconvolution/{ => main}/gexf_converter.py (100%) rename deconvolution/{ => main}/to_d2_graph.py (98%) create mode 100644 tests/Index_test.py diff --git a/README.md b/README.md index 6547fd1..c2d2075 100644 --- a/README.md +++ b/README.md @@ -48,5 +48,5 @@ Config parameters: ```bash snakemake -s Snakefile_data_simu --config n=10000 m=[4,6,8,10,12] m_dev=[0,0.5,1,2,3] - snakemake -s Snakefile_d2 --config input=[snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]] + snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf] ``` diff --git a/deconvolution/d2_algorithms.py b/deconvolution/d2graph/d2_algorithms.py similarity index 98% rename from deconvolution/d2_algorithms.py rename to deconvolution/d2graph/d2_algorithms.py index 44ba08a..1391181 100644 --- a/deconvolution/d2_algorithms.py +++ b/deconvolution/d2graph/d2_algorithms.py @@ -1,7 +1,6 @@ import networkx as nx -from itertools import combinations -from d2_path import Path, Unitig +from d2graph.d2_path import Unitig """ Remove unnecessary transitions diff --git a/deconvolution/d2_graph.py b/deconvolution/d2graph/d2_graph.py similarity index 96% rename from deconvolution/d2_graph.py rename to deconvolution/d2graph/d2_graph.py index 450e1cd..57bc141 100644 --- a/deconvolution/d2_graph.py +++ b/deconvolution/d2graph/d2_graph.py @@ -3,7 +3,7 @@ import itertools from bidict import bidict import sys -from d_graph import Dgraph, compute_all_max_d_graphs, filter_dominated, list_domination_filter +from dgraph.d_graph import Dgraph, compute_all_max_d_graphs, list_domination_filter class D2Graph(nx.Graph): @@ -56,7 +56,6 @@ class D2Graph(nx.Graph): def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None): - import debug_disct as dd # Compute all the d-graphs if verbose: print("Computing the unit d-graphs..") @@ -79,7 +78,7 @@ class D2Graph(nx.Graph): # Index all the d-graphs if verbose: - print("Compute the dmer index") + print("Compute the dmer dgraph") self.index = self.create_index_from_tuples(index_size, verbose=verbose) self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose) # Compute node distances for pair of dgraphs that share at least 1 dmer. @@ -209,7 +208,7 @@ class D2Graph(nx.Graph): to_remove = set() if verbose: - print("\tFilter dominated in index") + print("\tFilter dominated in dgraph") # Find dominated for dmer_idx, item in enumerate(self.index.items()): @@ -242,7 +241,7 @@ class D2Graph(nx.Graph): self.all_d_graphs.remove(r_dg) self.d_graphs_per_node[r_dg.center].remove(r_dg) - # Remove dominated in index + # Remove dominated in dgraph for dmer in itertools.combinations(r_dg.to_sorted_list(), tuple_size): if r_dg in self.index[dmer]: self.index[dmer] = list(filter(lambda x: x!=r_dg, self.index[dmer])) diff --git a/deconvolution/d2_path.py b/deconvolution/d2graph/d2_path.py similarity index 100% rename from deconvolution/d2_path.py rename to deconvolution/d2graph/d2_path.py diff --git a/deconvolution/d2_reduction.py b/deconvolution/d2graph/d2_reduction.py similarity index 96% rename from deconvolution/d2_reduction.py rename to deconvolution/d2graph/d2_reduction.py index 9eb85ff..dd534ac 100755 --- a/deconvolution/d2_reduction.py +++ b/deconvolution/d2graph/d2_reduction.py @@ -4,8 +4,7 @@ import networkx as nx import argparse import sys -import d2_graph as d2 -import d2_algorithms as d2a +from d2graph import d2_algorithms as d2a, d2_graph as d2 def parse_arguments(): diff --git a/deconvolution/path_algorithms.py b/deconvolution/d2graph/path_algorithms.py similarity index 99% rename from deconvolution/path_algorithms.py rename to deconvolution/d2graph/path_algorithms.py index 251c28b..e22c93c 100644 --- a/deconvolution/path_algorithms.py +++ b/deconvolution/d2graph/path_algorithms.py @@ -1,5 +1,4 @@ -import networkx as nx -from d2_path import Path +from d2graph.d2_path import Path """ Greedy algorithm. Start with th most probable unitig (ie lowest normalized penalty first and largest unitig for equalities). Then extends on both side to the nearest interesting unitig. diff --git a/deconvolution/path_optimization.py b/deconvolution/d2graph/path_optimization.py similarity index 98% rename from deconvolution/path_optimization.py rename to deconvolution/d2graph/path_optimization.py index 4b8e768..180f07b 100644 --- a/deconvolution/path_optimization.py +++ b/deconvolution/d2graph/path_optimization.py @@ -1,5 +1,5 @@ import random -from d2_path import Path +from d2graph.d2_path import Path import networkx as nx diff --git a/deconvolution/debug_disct.py b/deconvolution/debug_disct.py deleted file mode 100644 index 67f1430..0000000 --- a/deconvolution/debug_disct.py +++ /dev/null @@ -1,53 +0,0 @@ - -def save(dict, filename): - with open(filename, "w") as fp: - for key, array in dict.items(): - fp.write(str(len(array)) + " " + key + "\n") - fp.write('\n'.join([str(sorted(x.nodes)) for x in array]) + "\n") - print(filename, "saved") - - -def load(filename): - d = {} - with open(filename) as fp: - value = None - nb_vals = 0 - for line in fp: - line = line.strip() - - if value == None: - first_space = line.find(' ') - nb_vals = int(line[:first_space]) - value = line[first_space+1:] - d[value] = [] - else: - d[value].append(line.strip()) - nb_vals -= 1 - if nb_vals == 0: - value = None - - print(filename, "loaded") - return d - - -def compare(d1, d2): - remaining = set(d2.keys()) - - for key in d1: - if key not in d2: - print(key, "not present in d2") - - remaining.remove(key) - l1 = sorted([str(sorted(x.nodes)) for x in d1[key]]) - l2 = sorted([str(x) for x in d2[key]]) - - if l1 != l2: - print(f"{key}: disimilar lists:") - s1 = set(l1) - s2 = set(l2) - print(s1 - s2) - print(s2 - s1) - - for key in remaining: - print(key, "not present in d1") - diff --git a/deconvolution/dgraph/AbstractDGIndex.py b/deconvolution/dgraph/AbstractDGIndex.py new file mode 100644 index 0000000..db426e6 --- /dev/null +++ b/deconvolution/dgraph/AbstractDGIndex.py @@ -0,0 +1,59 @@ +from itertools import combinations + + +class AbstractDGIndex(dict): + + def __init__(self, fixed_size=False, size=3): + """ This class represent a d_graph dgraph. + Each key in the dgraph is a set of barcodes and each value a list of dgraphs containing these barcodes. + :param fixed_size: True if the keys in the dgraph have a fixed size. + :param size: The size of the key sets if fixed size, the min size otherwise. + """ + super(AbstractDGIndex, self).__init__() + self.fixed_size = fixed_size + self.size = size + + + def _add_value(self, key_set, dgraph): + """ Add the couple key (set of barcodes) and value (dgraph) at the right place in the dict + """ + # Test the key size + if self.fixed_size and len(key_set) != self.size: + raise ValueError("Wrong set size in the dgraph") + elif (not self.fixed_size) and len(key_set) < len(dgraph.node_set) - self.size: + raise ValueError("Wrong set size in the dgraph") + key_set = frozenset(key_set) + + # Add the key if not already present + if key_set not in self: + self[key_set] = set() + + # Associate the value with the key + self[key_set].add(dgraph) + + + def add_dgraph(self, dg): + """ Generate all the set needed for keys in the dgraph and push the d-graph as value. + For fixed size of the dgraph all the sets of this size will be generated as key. + Otherwise, all the set of size at least len(dg) - size will be generated. + """ + barcodes = dg.node_set + + if self.fixed_size: + for tup in combinations(barcodes, self.size): + self._add_value(frozenset(tup), dg) + else: + for size in range(len(barcodes)-self.size, len(barcodes)+1): + for tup in combinations(barcodes, size): + self._add_value(frozenset(tup), dg) + + + def __contains__(self, key): + key = frozenset(key) + return super(AbstractDGIndex, self).__contains__(key) + + def __getitem__(self, key): + return super(AbstractDGIndex, self).__getitem__(self.__keytransform__(key)) + + def __keytransform__(self, key): + return frozenset(key) diff --git a/tests/__init__.py b/deconvolution/dgraph/__init__.py similarity index 100% rename from tests/__init__.py rename to deconvolution/dgraph/__init__.py diff --git a/deconvolution/d_graph.py b/deconvolution/dgraph/d_graph.py similarity index 100% rename from deconvolution/d_graph.py rename to deconvolution/dgraph/d_graph.py diff --git a/deconvolution/graph_manipulator.py b/deconvolution/dgraph/graph_manipulator.py similarity index 100% rename from deconvolution/graph_manipulator.py rename to deconvolution/dgraph/graph_manipulator.py diff --git a/deconvolution/analyse_d2_tsv.py b/deconvolution/main/analyse_d2_tsv.py similarity index 100% rename from deconvolution/analyse_d2_tsv.py rename to deconvolution/main/analyse_d2_tsv.py diff --git a/deconvolution/d2_to_path.py b/deconvolution/main/d2_to_path.py similarity index 96% rename from deconvolution/d2_to_path.py rename to deconvolution/main/d2_to_path.py index e14ec93..444a14f 100755 --- a/deconvolution/d2_to_path.py +++ b/deconvolution/main/d2_to_path.py @@ -1,11 +1,10 @@ #!/usr/bin/env python3 import networkx as nx -import path_optimization as po import argparse import sys -import d2_graph as d2 +from d2graph import d2_graph as d2, path_optimization as po def parse_arguments(): diff --git a/deconvolution/evaluate.py b/deconvolution/main/evaluate.py similarity index 99% rename from deconvolution/evaluate.py rename to deconvolution/main/evaluate.py index d05d752..7b18f38 100755 --- a/deconvolution/evaluate.py +++ b/deconvolution/main/evaluate.py @@ -6,8 +6,6 @@ import argparse from termcolor import colored import networkx as nx -from d2_graph import D2Graph - def parse_args(): parser = argparse.ArgumentParser(description='Process some integers.') diff --git a/deconvolution/generate_fake_barcode_graph.py b/deconvolution/main/generate_fake_barcode_graph.py similarity index 98% rename from deconvolution/generate_fake_barcode_graph.py rename to deconvolution/main/generate_fake_barcode_graph.py index 15ddba1..f61631d 100755 --- a/deconvolution/generate_fake_barcode_graph.py +++ b/deconvolution/main/generate_fake_barcode_graph.py @@ -4,7 +4,7 @@ import networkx as nx import random import argparse -import graph_manipulator as gm +from dgraph import graph_manipulator as gm def parse_arguments(): diff --git a/deconvolution/generate_fake_molecule_graph.py b/deconvolution/main/generate_fake_molecule_graph.py similarity index 97% rename from deconvolution/generate_fake_molecule_graph.py rename to deconvolution/main/generate_fake_molecule_graph.py index a8455e9..a859ff5 100755 --- a/deconvolution/generate_fake_molecule_graph.py +++ b/deconvolution/main/generate_fake_molecule_graph.py @@ -2,7 +2,7 @@ import argparse -import graph_manipulator as gm +from dgraph import graph_manipulator as gm def parse_arguments(): diff --git a/deconvolution/gexf_converter.py b/deconvolution/main/gexf_converter.py similarity index 100% rename from deconvolution/gexf_converter.py rename to deconvolution/main/gexf_converter.py diff --git a/deconvolution/to_d2_graph.py b/deconvolution/main/to_d2_graph.py similarity index 98% rename from deconvolution/to_d2_graph.py rename to deconvolution/main/to_d2_graph.py index b233393..4a2d1be 100755 --- a/deconvolution/to_d2_graph.py +++ b/deconvolution/main/to_d2_graph.py @@ -4,7 +4,7 @@ import networkx as nx import argparse import sys -import d2_graph as d2 +from d2graph import d2_graph as d2 def parse_arguments(): diff --git a/tests/Index_test.py b/tests/Index_test.py new file mode 100644 index 0000000..1c7963f --- /dev/null +++ b/tests/Index_test.py @@ -0,0 +1,66 @@ +import unittest + +from random import randint +from dgraph.AbstractDGIndex import AbstractDGIndex +from dgraph.d_graph import Dgraph +from dgraph.graph_manipulator import generate_d_graph_chain + + +class TestIndex(unittest.TestCase): + def test_construction(self): + for _ in range(10): + size = randint(1, 50) + index = AbstractDGIndex(size=size) + self.assertEqual(len(index), 0) + self.assertEqual(index.size, size) + + + def test_wrong_size_filling(self): + index = AbstractDGIndex(fixed_size=True, size=3) + key = frozenset({'A', 'B'}) + val = "Test" + with self.assertRaises(ValueError): + index._add_value(key, val) + + + def test_fill_static(self): + index = AbstractDGIndex(fixed_size=True, size=3) + key = frozenset({'A', 'B', 'C'}) + val = "Test" + index._add_value(key, val) + self.assertEqual(len(index), 1) + self.assertTrue(key in index) + self.assertEqual(index[key], {val}) + + + def test_fixed_size(self): + dg = _generate_dg(2) + index = AbstractDGIndex(fixed_size=True, size=2) + index.add_dgraph(dg) + print("\n".join([str(k) for k in index.keys()])) + self.assertEqual(len(index), 10) + + + def test_variable_size(self): + dg = _generate_dg(2) + index = AbstractDGIndex(fixed_size=False, size=2) + index.add_dgraph(dg) + print("\n".join([str(k) for k in index.keys()])) + self.assertEqual(len(index), 16) + + +def _generate_dg(d): + # nx graph construction + G = generate_d_graph_chain(2*d+1, d) + center = d + h1 = list(G.subgraph([x for x in range(d)]).nodes()) + h2 = list(G.subgraph([2*d-x for x in range(d)]).nodes()) + + # d-graph construction + dg = Dgraph(center) + dg.put_halves(h1, h2, G) + return dg + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/d2_algorithms_test.py b/tests/d2_algorithms_test.py index 052abf9..0516a8f 100644 --- a/tests/d2_algorithms_test.py +++ b/tests/d2_algorithms_test.py @@ -1,8 +1,5 @@ import unittest -import graph_manipulator as gm -from d2_graph import D2Graph - class TestD2Algorithms(unittest.TestCase): diff --git a/tests/d2_graph_test.py b/tests/d2_graph_test.py index e11cd35..5147a81 100644 --- a/tests/d2_graph_test.py +++ b/tests/d2_graph_test.py @@ -3,11 +3,10 @@ import tempfile import networkx as nx from scipy.special import comb -from d2_graph import D2Graph -from d_graph import Dgraph -import graph_manipulator as gm +from d2graph.d2_graph import D2Graph +from dgraph import graph_manipulator as gm -from tests.d_graph_data import complete_graph +from d_graph_data import complete_graph class TestD2Graph(unittest.TestCase): @@ -22,7 +21,7 @@ class TestD2Graph(unittest.TestCase): else: self.assertEqual(0, len(candidates)) - # Evaluate the index + # Evaluate the dgraph self.assertEqual(13, len(d2.index)) overlap_key = ('A1', 'A2', 'B0', 'B1', 'B2', 'C') @@ -47,7 +46,7 @@ class TestD2Graph(unittest.TestCase): awaited_d_num = size - 2 * d self.assertEqual(awaited_d_num, len(d2.all_d_graphs)) - # Test index + # Test dgraph awaited_index_size = comb(2 * d + 1, index_k) + (size - (2 * d + 1)) * comb(2 * d, index_k - 1) if len(d2.index) != awaited_index_size: dmers = [list(x) for x in d2.index] diff --git a/tests/d_graph_test.py b/tests/d_graph_test.py index 0e71494..ddbc259 100644 --- a/tests/d_graph_test.py +++ b/tests/d_graph_test.py @@ -1,9 +1,8 @@ import unittest -from tests.d_graph_data import unit_d_graph -from d_graph import Dgraph -import graph_manipulator as gm - +from d_graph_data import unit_d_graph +from dgraph.d_graph import Dgraph +from dgraph import graph_manipulator as gm class TestDGraph(unittest.TestCase): diff --git a/tests/graph_manipulation_test.py b/tests/graph_manipulation_test.py index 033001a..4ff4a38 100644 --- a/tests/graph_manipulation_test.py +++ b/tests/graph_manipulation_test.py @@ -1,7 +1,6 @@ import unittest -import graph_manipulator as gm - +from dgraph import graph_manipulator as gm class TestGraphManipulation(unittest.TestCase): -- GitLab