diff --git a/README.md b/README.md
index 6547fd1d2ef2c00e2427dec587eddecab2c43bd6..b9f7294e5e08de101da4adbc51a148f1a2c8e393 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,16 @@
 
 Trying to deconvolve single tag assignment for multiple molecules
 
+## Installation
+
+Install the package from the root directory.
+```bash
+    # For users
+    pip install . --user
+    # For developers
+    pip install -e . --user
+```
+
 ## Scripts
 
 For the majority of the scripts, argparse is used.
@@ -48,5 +58,5 @@ Config parameters:
 
 ```bash
     snakemake -s Snakefile_data_simu --config n=10000 m=[4,6,8,10,12] m_dev=[0,0.5,1,2,3]
-    snakemake -s Snakefile_d2 --config input=[snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]]
+    snakemake -s Snakefile_d2 --config input=[snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]
 ```
diff --git a/Snakefile_d2 b/Snakefile_d2
index 14c096a22701b995d836ec40ca20ce4a71b20f0c..2b762533b39d2031a9e52f412fe06c9cb4417bad 100644
--- a/Snakefile_d2
+++ b/Snakefile_d2
@@ -17,15 +17,16 @@ rule compress_data:
     input:
         barcode_graph=f"{WORKDIR}/{{barcode_file}}.gexf",
         d2_raw=f"{WORKDIR}/{{barcode_file}}_d2_raw_maxclq.gexf",
-        simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf",
-        d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf",
-        simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf"
+        simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf"
+        # d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf",
+        # simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf"
     output:
         zip=f"{WORKDIR}/{{barcode_file}}.tar.gz"
     shell:
         f"cd {WORKDIR}/ ;"
         "if [ ! -d {wildcards.barcode_file} ]; then mkdir {wildcards.barcode_file} ; fi;"
-        "mv *.gexf {wildcards.barcode_file}/ ;"
+        "mv {wildcards.barcode_file}_* {wildcards.barcode_file}/ ;"
+        "cp {wildcards.barcode_file}.gexf {wildcards.barcode_file}/ ;"
         "tar czf {wildcards.barcode_file}.tar.gz {wildcards.barcode_file}/ ;"
         "rm -rf {wildcards.barcode_file}/ ;"
         "cd - ;"
@@ -37,7 +38,7 @@ rule d2_simplification:
     output:
         simplified_d2="{barcode_path}_d2_simplified_{method}.gexf"
     shell:
-        "python3 deconvolution/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
+        "python3 deconvolution/main/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
 
 
 rule d2_generation:
@@ -46,7 +47,7 @@ rule d2_generation:
     output:
         d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf"
     run:
-        shell(f"python3 deconvolution/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
+        shell(f"python3 deconvolution/main/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
 
 
 rule setup_workdir:
@@ -58,4 +59,4 @@ rule setup_workdir:
         shell(f"if [ ! -d {WORKDIR} ]; then mkdir {WORKDIR}; fi;")
         shell(f"cp {{input}} {WORKDIR}")
         if input[-5:] != ".gexf":
-            shell("python3 deconvolution/gexf_converter.py {input}")
+            shell("python3 deconvolution/main/gexf_converter.py {input}")
diff --git a/Snakefile_data_simu b/Snakefile_data_simu
index 318918e5e041ec6f2fbddcf39c88c4ef87b91a40..2c720599c810ca13cfec0f7e7ff547d953df24c8 100644
--- a/Snakefile_data_simu
+++ b/Snakefile_data_simu
@@ -16,11 +16,11 @@ rule generate_barcodes:
     output:
         "{path}/simu_bar_{params}_m{m}-dev{md}.gexf"
     shell:
-        "python3 deconvolution/generate_fake_barcode_graph.py --merging_depth {wildcards.m} --deviation {wildcards.md} --input_graph {input} --output {output}"
+        "python3 deconvolution/main/generate_fake_barcode_graph.py --merging_depth {wildcards.m} --deviation {wildcards.md} --input_graph {input} --output {output}"
 
 rule generate_molecules:
     output:
         "{path}/simu_mol_n{n}_d{d}.gexf"
     shell:
-        "python3 deconvolution/generate_fake_molecule_graph.py --num_molecule {wildcards.n} --avg_depth {wildcards.d} --output {output}"
+        "python3 deconvolution/main/generate_fake_molecule_graph.py --num_molecule {wildcards.n} --avg_depth {wildcards.d} --output {output}"
 
diff --git a/tests/__init__.py b/deconvolution/d2graph/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to deconvolution/d2graph/__init__.py
diff --git a/deconvolution/d2_algorithms.py b/deconvolution/d2graph/d2_algorithms.py
similarity index 98%
rename from deconvolution/d2_algorithms.py
rename to deconvolution/d2graph/d2_algorithms.py
index 44ba08a0180926d6325f337d4b14f917795a3707..f54df1345325f49fb1881f0ddcbe48d860df4156 100644
--- a/deconvolution/d2_algorithms.py
+++ b/deconvolution/d2graph/d2_algorithms.py
@@ -1,7 +1,6 @@
 import networkx as nx
-from itertools import combinations
 
-from d2_path import Path, Unitig
+from deconvolution.d2graph.d2_path import Unitig
 
 
 """ Remove unnecessary transitions
diff --git a/deconvolution/d2_graph.py b/deconvolution/d2graph/d2_graph.py
similarity index 72%
rename from deconvolution/d2_graph.py
rename to deconvolution/d2graph/d2_graph.py
index 450e1cdd7a16031a0d9e20014b2b88a7fc439658..abfd9780185f7025787f9f331ca8b1189d944365 100644
--- a/deconvolution/d2_graph.py
+++ b/deconvolution/d2graph/d2_graph.py
@@ -3,7 +3,11 @@ import itertools
 from bidict import bidict
 import sys
 
-from d_graph import Dgraph, compute_all_max_d_graphs, filter_dominated, list_domination_filter
+from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
+from deconvolution.dgraph.VariableDGIndex import VariableDGIndex
+from deconvolution.dgraph.d_graph import Dgraph
+from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory
+from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
 
 
 class D2Graph(nx.Graph):
@@ -55,23 +59,22 @@ class D2Graph(nx.Graph):
         return self.subgraph(list(self.nodes()))
 
 
-    def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None):
-        import debug_disct as dd
+    def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None, threads=1):
         # Compute all the d-graphs
         if verbose:
             print("Computing the unit d-graphs..")
-        self.d_graphs_per_node = compute_all_max_d_graphs(self.barcode_graph, debug=debug, clique_mode=clique_mode)
+        dg_factory = None
+        if clique_mode == "louvain":
+            dg_factory = LouvainDGFactory(self.barcode_graph)
+        else:
+            dg_factory = CliqueDGFactory(self.barcode_graph)
+        self.d_graphs_per_node = dg_factory.generate_all_dgraphs(debug=True)
         if verbose:
             counts = sum(len(x) for x in self.d_graphs_per_node.values())
             print(f"\t {counts} computed d-graphs")
-        #self.d_graphs_per_node = filter_dominated(self.d_graphs_per_node)
-        #if verbose:
-        #    counts = sum(len(x) for x in self.d_graphs_per_node.values())
-        #    print(f"\t {counts} remaining d-graphs after first filter")
         for d_graphs in self.d_graphs_per_node.values():
             self.all_d_graphs.extend(d_graphs)
 
-        # Name the d-graphs
         # Number the d_graphs
         for idx, d_graph in enumerate(self.all_d_graphs):
             d_graph.idx = idx
@@ -79,15 +82,26 @@ class D2Graph(nx.Graph):
 
         # Index all the d-graphs
         if verbose:
-            print("Compute the dmer index")
-        self.index = self.create_index_from_tuples(index_size, verbose=verbose)
-        self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
+            print("Compute the dmer dgraph")
+            print("\tIndexing")
+        # self.index = FixedDGIndex(size=index_size)
+        self.index = VariableDGIndex(size=2)
+        for idx, dg in enumerate(self.all_d_graphs):
+            if verbose:
+                print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='')
+            self.index.add_dgraph(dg)
+            # self.var_index.add_dgraph(dg)
+        if verbose:
+            print()
+            print("\tFilter index")
+        self.index.filter_by_entry()
+        # self.index = self.create_index_from_tuples(index_size, verbose=verbose)
+        # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
         # Compute node distances for pair of dgraphs that share at least 1 dmer.
         if verbose:
             print("Compute the graph")
         # Create the graph
         self.bidict_nodes = self.create_graph()
-        #self.compute_distances()
 
 
     def get_covering_variables(self, udg):
@@ -203,55 +217,3 @@ class D2Graph(nx.Graph):
             # Distance computing and adding in the dist dicts
             d = dg1.distance_to(dg2)
             data["distance"] = d
-
-
-    def filter_dominated_in_index(self, tuple_size=3, verbose=True):
-        to_remove = set()
-
-        if verbose:
-            print("\tFilter dominated in index")
-
-        # Find dominated
-        for dmer_idx, item in enumerate(self.index.items()):
-            dmer, dg_list = item
-
-            if verbose:
-                sys.stdout.write(f"\r\t{dmer_idx+1}/{len(self.index)}")
-                sys.stdout.flush()
-
-            undominated = list_domination_filter(dg_list)
-
-            # Register dominated
-            if len(dg_list) != len(undominated):
-                for dg in dg_list:
-                    if dg not in undominated:
-                        to_remove.add(dg)
-
-            self.index[dmer] = undominated
-
-        if verbose:
-            print()
-            print("\tDmer removal")
-
-        removable_dmers = set()
-        for r_idx, r_dg in enumerate(to_remove):
-            if verbose:
-                sys.stdout.write(f"\r\t{r_idx+1}/{len(to_remove)}")
-                sys.stdout.flush()
-
-            self.all_d_graphs.remove(r_dg)
-            self.d_graphs_per_node[r_dg.center].remove(r_dg)
-
-            # Remove dominated in index
-            for dmer in itertools.combinations(r_dg.to_sorted_list(), tuple_size):
-                if r_dg in self.index[dmer]:
-                    self.index[dmer] = list(filter(lambda x: x!=r_dg, self.index[dmer]))
-                    if len(self.index[dmer]) == 0:
-                        removable_dmers.add(dmer)
-
-        # Remove empty dmers
-        for dmer in removable_dmers:
-            del self.index[dmer]
-        if verbose:
-            print()
-
diff --git a/deconvolution/d2_path.py b/deconvolution/d2graph/d2_path.py
similarity index 100%
rename from deconvolution/d2_path.py
rename to deconvolution/d2graph/d2_path.py
diff --git a/deconvolution/path_algorithms.py b/deconvolution/d2graph/path_algorithms.py
similarity index 99%
rename from deconvolution/path_algorithms.py
rename to deconvolution/d2graph/path_algorithms.py
index 251c28b23ce814bd9e1897bdde173af3e1e1aa58..d10a49c73262e966f4984d966d349d626e57701f 100644
--- a/deconvolution/path_algorithms.py
+++ b/deconvolution/d2graph/path_algorithms.py
@@ -1,5 +1,4 @@
-import networkx as nx
-from d2_path import Path
+from deconvolution.d2graph.d2_path import Path
 
 """ Greedy algorithm. Start with th most probable unitig (ie lowest normalized penalty first and largest unitig for
     equalities). Then extends on both side to the nearest interesting unitig.
diff --git a/deconvolution/path_optimization.py b/deconvolution/d2graph/path_optimization.py
similarity index 98%
rename from deconvolution/path_optimization.py
rename to deconvolution/d2graph/path_optimization.py
index 4b8e7681cb879f8a120a0df6c2004275e186f8c1..3dbde86dc5b1954f3260ce0015244f5879b2d876 100644
--- a/deconvolution/path_optimization.py
+++ b/deconvolution/d2graph/path_optimization.py
@@ -1,7 +1,8 @@
 import random
-from d2_path import Path
 import networkx as nx
 
+from deconvolution.d2graph.d2_path import Path
+
 
 class Optimizer:
 
diff --git a/deconvolution/debug_disct.py b/deconvolution/debug_disct.py
deleted file mode 100644
index 67f14303c78ed3e2033de31adbed58b0929d681e..0000000000000000000000000000000000000000
--- a/deconvolution/debug_disct.py
+++ /dev/null
@@ -1,53 +0,0 @@
-
-def save(dict, filename):
-    with open(filename, "w") as fp:
-        for key, array in dict.items():
-            fp.write(str(len(array)) + " " + key + "\n")
-            fp.write('\n'.join([str(sorted(x.nodes)) for x in array]) + "\n")
-    print(filename, "saved")
-
-
-def load(filename):
-    d = {}
-    with open(filename) as fp:
-        value = None
-        nb_vals = 0
-        for line in fp:
-            line = line.strip()
-
-            if value == None:
-                first_space = line.find(' ')
-                nb_vals = int(line[:first_space])
-                value = line[first_space+1:]
-                d[value] = []
-            else:
-                d[value].append(line.strip())
-                nb_vals -= 1
-                if nb_vals == 0:
-                    value = None
-
-    print(filename, "loaded")
-    return d
-
-
-def compare(d1, d2):
-    remaining = set(d2.keys())
-
-    for key in d1:
-        if key not in d2:
-            print(key, "not present in d2")
-
-        remaining.remove(key)
-        l1 = sorted([str(sorted(x.nodes)) for x in d1[key]])
-        l2 = sorted([str(x) for x in d2[key]])
-
-        if l1 != l2:
-            print(f"{key}: disimilar lists:")
-            s1 = set(l1)
-            s2 = set(l2)
-            print(s1 - s2)
-            print(s2 - s1)
-
-    for key in remaining:
-        print(key, "not present in d1")
-
diff --git a/deconvolution/dgraph/AbstractDGFactory.py b/deconvolution/dgraph/AbstractDGFactory.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ebf1059f9d03e4f7a10e296fe6b1c5f98891821
--- /dev/null
+++ b/deconvolution/dgraph/AbstractDGFactory.py
@@ -0,0 +1,115 @@
+import networkx as nx
+from abc import abstractmethod
+
+from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
+
+
+class AbstractDGFactory:
+    def __init__(self, graph):
+        self.graph = graph
+
+
+    def generate_all_dgraphs(self, debug=False):
+        index = FixedDGIndex(size=1)
+
+        nb_nodes = len(self.graph.nodes())
+        for idx, node in enumerate(self.graph.nodes()):
+            if debug: print(f"\r{idx+1}/{nb_nodes} node analysis", end='')
+            neighbors = list(self.graph.neighbors(node))
+            subgraph = nx.Graph(self.graph.subgraph(neighbors))
+
+            # Fill the index by node
+            key = frozenset({node})
+            for dg in self.generate_by_node(node, subgraph):
+                index.add_value(key, dg)
+
+        index.filter_by_entry()
+        return index
+
+
+    @abstractmethod
+    def generate_by_node(self, node, subgraph):
+        pass
+
+
+# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1):
+#     d_graphs = FixedDGIndex(size=1)
+#
+#     nds = list(graph.nodes())
+#     for idx, node in enumerate(nds):
+#         # print(idx+1, '/', len(nds))
+#         #if "MI" not in str(node): continue    # for debugging; only look at deconvolved nodes
+#         #print(f"\r{idx+1}/{len(graph.nodes())}")
+#         neighbors = list(graph.neighbors(node))
+#         neighbors_graph = nx.Graph(graph.subgraph(neighbors))
+#
+#         node_d_graphs = set()
+#
+#         mode_str = " "
+#         if clique_mode is None:
+#             # Find all the cliques (equivalent to compute all the candidate half d-graph)
+#             cliques = []
+#             for clique in nx.find_cliques(neighbors_graph):
+#                 if len(clique) > 3:
+#                     cliques.append(clique)
+#             mode_str += "(max-cliques)"
+#         elif clique_mode == "louvain":
+#             louvain = community.best_partition(neighbors_graph) # louvain
+#             # high resolution seems to work better
+#             communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())]
+#             mode_str += "(louvain)"
+#             cliques = []
+#             for comm in communities:
+#                 # further decompose! into necessarily 2 communities
+#                 community_as_graph = nx.Graph(graph.subgraph(comm))
+#                 if len(community_as_graph.nodes()) <= 2:
+#                     cliques += [community_as_graph.nodes()]
+#                 else:
+#                     cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2))
+#
+#         elif clique_mode == "testing":
+#             # k-clique communities
+#             #from networkx.algorithms.community import k_clique_communities
+#             #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter
+#             from cdlib import algorithms
+#             cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1})
+#             #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1})
+#             #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of
+#             from collections import defaultdict
+#             cliques_dict2 = defaultdict(list)
+#             for (node, values) in cliques_dict.to_node_community_map().items():
+#                 for value in values:
+#                     cliques_dict2[value] += [node]
+#             cliques = list(cliques_dict2.values())
+#             mode_str += "(testing)"
+#
+#         if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")")
+#
+#         cliques_debugging = True
+#         if cliques_debugging:
+#
+#             from collections import Counter
+#             len_cliques = Counter(map(len,cliques))
+#
+#         # Pair halves to create d-graphes
+#         for idx, clq1 in enumerate(cliques):
+#             for clq2_idx in range(idx+1, len(cliques)):
+#                 clq2 = cliques[clq2_idx]
+#
+#                 # Check for d-graph candidates
+#                 d_graph = Dgraph(node)
+#                 d_graph.put_halves(clq1, clq2, neighbors_graph)
+#
+#                 factor = 0.5
+#                 #if clique_mode == "testing": factor = 1 # still allows louvain's big communities
+#                 #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor)
+#                 if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor:
+#                     node_d_graphs.add(d_graph)
+#
+#         # Fill the index by node
+#         key = frozenset({node})
+#         for dg in node_d_graphs:
+#             d_graphs.add_value(key, dg)
+#
+#     d_graphs.filter_by_entry()
+#     return d_graphs
diff --git a/deconvolution/dgraph/AbstractDGIndex.py b/deconvolution/dgraph/AbstractDGIndex.py
new file mode 100644
index 0000000000000000000000000000000000000000..06771b4802e67df79a09051c33583a77529b2c1f
--- /dev/null
+++ b/deconvolution/dgraph/AbstractDGIndex.py
@@ -0,0 +1,80 @@
+from abc import abstractmethod
+
+
+class AbstractDGIndex(dict):
+
+    def __init__(self, size=3):
+        """ This class represent a index of dgraphs.
+        Each key in the index is a set of barcodes and each value a list of dgraphs containing these barcodes.
+        :param size: Usage vary regarding the subclass used
+        """
+        super(AbstractDGIndex, self).__init__()
+        self.size = size
+
+
+    @abstractmethod
+    def _verify_key(self, key_set, dg_size=0):
+        pass
+
+    def add_value(self, key_set, dgraph):
+        """ Add the couple key (set of barcodes) and value (dgraph) at the right place in the dict
+        """
+        pass
+        # Test the key size
+        self._verify_key(key_set)
+        key_set = frozenset(key_set)
+
+        # Add the key if not already present
+        if key_set not in self:
+            self[key_set] = set()
+
+        # Associate the value with the key
+        self[key_set].add(dgraph)
+
+
+    @abstractmethod
+    def add_dgraph(self, dg):
+        """ Generate all the set needed for keys in the dgraph and push the d-graph as value.
+        For fixed size of the dgraph all the sets of this size will be generated as key.
+        Otherwise, all the set of size at least len(dg) - size will be generated.
+        """
+        pass
+
+
+    def _filter_entry(self, key_set):
+        """ For one entry in the index, filter out dominated dgraphs
+        :param key_set: The entry to filter
+        """
+        # Verify presence in the index
+        if key_set not in self:
+            raise KeyError("The set is not present in the index")
+
+        # nÂ² filtering
+        dgs = self[key_set]
+        to_remove = set()
+
+        for dg1 in dgs:
+            for dg2 in dgs:
+                if dg1.is_dominated(dg2):
+                    to_remove.add(dg1)
+                    break
+
+        self[key_set] = dgs - to_remove
+        return to_remove
+
+
+    def filter_by_entry(self):
+        for key_set in self:
+            removed = self._filter_entry(key_set)
+            # TODO: remove globaly ?
+
+
+    def __contains__(self, key):
+        key = frozenset(key)
+        return super(AbstractDGIndex, self).__contains__(key)
+
+    def __getitem__(self, key):
+        return super(AbstractDGIndex, self).__getitem__(self.__keytransform__(key))
+
+    def __keytransform__(self, key):
+        return frozenset(key)
diff --git a/deconvolution/dgraph/CliqueDGFactory.py b/deconvolution/dgraph/CliqueDGFactory.py
new file mode 100644
index 0000000000000000000000000000000000000000..07959e6f47c91e0f37fc7821c15f5e7ffe1c9e44
--- /dev/null
+++ b/deconvolution/dgraph/CliqueDGFactory.py
@@ -0,0 +1,37 @@
+import networkx as nx
+
+from deconvolution.dgraph.AbstractDGFactory import AbstractDGFactory
+from deconvolution.dgraph.d_graph import Dgraph
+
+class CliqueDGFactory(AbstractDGFactory):
+
+    def __init__(self, graph, min_size_clique=4, dg_max_divergence_factor=0.5):
+        super(CliqueDGFactory, self).__init__(graph)
+        self.min_size = min_size_clique
+        self.dg_max_divergence_factor = dg_max_divergence_factor
+
+
+    def generate_by_node(self, node, subgraph):
+        node_d_graphs = set()
+
+        # Clique computation
+        cliques = []
+        for clique in nx.find_cliques(subgraph):
+            if len(clique) >= self.min_size:
+                cliques.append(clique)
+
+        # TODO: Index cliques to pair them faster
+
+        # d-graph computation
+        for idx, clq1 in enumerate(cliques):
+            for clq2_idx in range(idx+1, len(cliques)):
+                clq2 = cliques[clq2_idx]
+
+                # Check for d-graph candidates
+                d_graph = Dgraph(node)
+                d_graph.put_halves(clq1, clq2, subgraph)
+
+                if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * self.dg_max_divergence_factor:
+                    node_d_graphs.add(d_graph)
+
+        return node_d_graphs
diff --git a/deconvolution/dgraph/FixedDGIndex.py b/deconvolution/dgraph/FixedDGIndex.py
new file mode 100644
index 0000000000000000000000000000000000000000..8edca6277df8216d13437288d61d11d596edc730
--- /dev/null
+++ b/deconvolution/dgraph/FixedDGIndex.py
@@ -0,0 +1,20 @@
+from itertools import combinations
+
+from deconvolution.dgraph.AbstractDGIndex import AbstractDGIndex
+
+
+class FixedDGIndex(AbstractDGIndex):
+
+    def __init__(self, size=3):
+        super(FixedDGIndex, self).__init__(size=size)
+
+
+    def _verify_key(self, key_set, dg_size=0):
+        if len(key_set) != self.size:
+            raise ValueError("Wrong set size in the dgraph")
+
+
+    def add_dgraph(self, dg):
+        barcodes = dg.node_set
+        for tup in combinations(barcodes, self.size):
+            self.add_value(frozenset(tup), dg)
diff --git a/deconvolution/dgraph/LouvainDGFactory.py b/deconvolution/dgraph/LouvainDGFactory.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4dbe688ec9519dbdb9907262d0a917e1330cc65
--- /dev/null
+++ b/deconvolution/dgraph/LouvainDGFactory.py
@@ -0,0 +1,42 @@
+import networkx as nx
+
+from deconvolution.dgraph.AbstractDGFactory import AbstractDGFactory
+from deconvolution.dgraph.d_graph import Dgraph
+import community
+
+class LouvainDGFactory(AbstractDGFactory):
+
+    def __init__(self, graph, dg_max_divergence_factor=0.5):
+        super(LouvainDGFactory, self).__init__(graph)
+        self.dg_max_divergence_factor = dg_max_divergence_factor
+
+
+    def generate_by_node(self, node, subgraph):
+        node_d_graphs = set()
+
+        louvain = community.best_partition(subgraph) # louvain
+        # high resolution seems to work better
+        communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())]
+        cliques = []
+        for comm in communities:
+            # further decompose! into necessarily 2 communities
+            community_as_graph = nx.Graph(subgraph.subgraph(comm))
+            if len(community_as_graph.nodes()) <= 2:
+                cliques += [community_as_graph.nodes()]
+            else:
+                cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2))
+
+
+        # d-graph computation
+        for idx, clq1 in enumerate(cliques):
+            for clq2_idx in range(idx+1, len(cliques)):
+                clq2 = cliques[clq2_idx]
+
+                # Check for d-graph candidates
+                d_graph = Dgraph(node)
+                d_graph.put_halves(clq1, clq2, subgraph)
+
+                if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * self.dg_max_divergence_factor:
+                    node_d_graphs.add(d_graph)
+
+        return node_d_graphs
diff --git a/deconvolution/dgraph/VariableDGIndex.py b/deconvolution/dgraph/VariableDGIndex.py
new file mode 100644
index 0000000000000000000000000000000000000000..013bb9964c5fe19e100075d4d618acde5b8e8dc3
--- /dev/null
+++ b/deconvolution/dgraph/VariableDGIndex.py
@@ -0,0 +1,22 @@
+from itertools import combinations
+
+from deconvolution.dgraph.AbstractDGIndex import AbstractDGIndex
+
+
+class VariableDGIndex(AbstractDGIndex):
+
+    def __init__(self, size=3):
+        super(VariableDGIndex, self).__init__(size=size)
+
+
+    def _verify_key(self, key_set, dg_size=0):
+        if len(key_set) < dg_size - self.size:
+            raise ValueError("Wrong set size in the dgraph")
+
+
+    def add_dgraph(self, dg):
+        barcodes = dg.node_set
+
+        for size in range(len(barcodes)-self.size, len(barcodes)+1):
+            for tup in combinations(barcodes, size):
+                self.add_value(frozenset(tup), dg)
diff --git a/deconvolution/dgraph/__init__.py b/deconvolution/dgraph/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/deconvolution/dgraph/d_graph.py b/deconvolution/dgraph/d_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2a915c114807f24b0d3840e799d46f523673513
--- /dev/null
+++ b/deconvolution/dgraph/d_graph.py
@@ -0,0 +1,318 @@
+from functools import total_ordering
+
+
+
+@total_ordering
+class Dgraph(object):
+    """docstring for Dgraph"""
+    def __init__(self, center):
+        super(Dgraph, self).__init__()
+        self.idx = -1
+        self.center = center
+        self.score = 0
+        self.halves = [None,None]
+        self.connexity = [None,None]
+        self.nodes = [self.center]
+        self.node_set = set(self.nodes)
+        self.edges = []
+        self.ordered_list = None
+        self.sorted_list = None
+
+        self.marked = False
+
+
+    """ Static method to load a dgraph from a text
+        @param text the saved d-graph
+        @param barcode_graph Barcode graph from which the d-graph is extracted
+        @return a new d-graph object corresponding to the test 
+    """
+    def load(text, barcode_graph):
+        # basic split
+        text = text.replace(']', '')
+        head, h1, h2 = text.split('[')
+
+        # Head parsing
+        center = head.replace(' ', '')
+        if center not in barcode_graph:
+            center = int(center)
+        dg = Dgraph(center)
+
+        # Reload halves
+        h1 = [x.split(' ')[-2] for x in h1.split(',')]
+        h1 = [int(x) if x not in barcode_graph else x for x in h1]
+        h2 = [x.split(' ')[-2] for x in h2.split(',')]
+        h2 = [int(x) if x not in barcode_graph else x for x in h2]
+        dg.put_halves(h1, h2, barcode_graph)
+
+        return dg
+
+
+    """ Compute the d-graph quality (score) according to the connectivity between the two halves.
+        @param h1 First half of the d-graph
+        @param h2 Second half of the d-graph
+        @param graph The connectivity graph
+    """
+    def put_halves(self, h1, h2, graph):
+        self.score = 0
+        self.halves[0] = h1
+        for node in h1:
+            self.node_set.add(node)
+            self.nodes.append(node)
+        self.halves[1] = h2
+        for node in h2:
+            self.node_set.add(node)
+            self.nodes.append(node)
+
+        # self.nodes = sorted([self.center] + self.halves[0] + self.halves[1])
+        self.connexity[0] = {key: 0 for key in self.halves[0]}
+        self.connexity[1] = {key: 0 for key in self.halves[1]}
+        self.edges = []
+
+        # Compute link arities
+        for node1 in self.halves[0]:
+            neighbors = set(graph.neighbors(node1))
+
+            for node2 in self.halves[1]:
+                if node1 == node2 or node2 in neighbors:
+                    self.score += 1
+                    self.connexity[0][node1] += 1
+                    self.connexity[1][node2] += 1
+
+        # Compute links from the center to the other nodes
+        for idx, node1 in enumerate(self.nodes):
+            for node2 in self.nodes[idx+1:]:
+                if graph.has_edge(node1, node2):
+                    if node1 < node2:
+                        self.edges.append((node1, node2))
+                    else:
+                        self.edges.append((node2, node1))
+
+        # Sort the halves by descending connexity
+        connex = self.connexity
+        self.halves[0].sort(reverse=True, key=lambda v: connex[0][v])
+        self.halves[1].sort(reverse=True, key=lambda v: connex[1][v])
+
+
+    def get_link_divergence(self):
+        return int(abs(self.score - self.get_optimal_score()))
+
+
+    def get_optimal_score(self):
+        max_len = max(len(self.halves[0]), len(self.halves[1]))
+        return int(max_len * (max_len - 1) / 2)
+
+
+    def to_sorted_list(self):
+        if self.sorted_list is None:
+            self.sorted_list = sorted(self.nodes)
+        return self.sorted_list
+
+
+    def to_ordered_lists(self):
+        if self.ordered_list is None:
+            hands = [[],[]]
+            for idx in range(2):
+                prev_connectivity = -1
+                for node in self.halves[idx]:
+                    # group nodes by similar connectivity
+                    value = self.connexity[idx][node]
+                    if value != prev_connectivity:
+                        hands[idx].append([])
+                        prev_connectivity = value
+                    hands[idx][-1].append(node)
+
+            self.ordered_list = hands[0][::-1] + [[self.center]] + hands[1]
+        return self.ordered_list
+
+
+    def to_node_set(self):
+        return frozenset(self.to_sorted_list())
+
+
+    def distance_to(self, dgraph):
+        nodes_1 = self.to_sorted_list()
+        nodes_2 = other_nodes = dgraph.to_sorted_list()
+
+        dist = 0
+        idx1, idx2 = 0, 0
+        while idx1 != len(nodes_1) and idx2 != len(nodes_2):
+            if nodes_1[idx1] == nodes_2[idx2]:
+                idx1 += 1
+                idx2 += 1
+            else:
+                dist += 1
+                if nodes_1[idx1] < nodes_2[idx2]:
+                    idx1 += 1
+                else:
+                    idx2 += 1
+        dist += len(nodes_1) - idx1 + len(nodes_2) - idx2
+
+        return dist
+
+
+    """ Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes
+    of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2.
+    @param dg1 (resp dg2) A d_graph object.
+    @return True if dg1 is dominated by dg2.
+    """
+    def is_dominated(self, dg):
+        dg1_nodes = self.to_node_set()
+        dg2_nodes = dg.to_node_set()
+
+        # domination first condition: inclusion of all the nodes
+        if not dg1_nodes.issubset(dg2_nodes):
+            return False
+
+        # domination second condition
+        if len(dg1_nodes) == len(dg2_nodes):
+            if self.get_link_divergence() > dg.get_link_divergence():
+                return True
+        elif self.get_link_divergence() >= dg.get_link_divergence():
+            return True
+
+        return False
+
+
+    def __eq__(self, other):
+        if other is None:
+            return False
+
+        if self.idx != -1 and self.idx == other.idx:
+            return True
+
+        if self.node_set != other.node_set:
+            return False
+
+        return self.to_ordered_lists() == other.to_ordered_lists()
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __lt__(self, other):
+        my_tuple = (self.get_link_divergence(), self.get_optimal_score())
+        other_tuple = (other.get_link_divergence(), other.get_optimal_score())
+        return my_tuple < other_tuple
+
+
+    def __hash__(self):
+        nodelist = self.to_sorted_list()
+        nodelist = [str(x) for x in nodelist]
+        return ",".join(nodelist).__hash__()
+
+
+    def __ordered_hash__(self):
+        lst = self.to_ordered_lists()
+
+        fwd_uniq_lst = [sorted(l) for l in lst]
+        fwd_str = ",".join([f"[{'-'.join(l)}]" for l in fwd_uniq_lst])
+        fwd_hash = fwd_str.__hash__()
+
+        rev_uniq_lst = [sorted(l) for l in lst[::-1]]
+        rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst])
+        rev_hash = rev_str.__hash__()
+
+        return int(min(fwd_hash, rev_hash))
+
+
+    def __repr__(self):
+        # print(self.halves)
+        representation = str(self.center) + " "
+        representation += "[" + ", ".join([f"{node} {self.connexity[0][node]}" for node in self.halves[0]]) + "]"
+        representation += "[" + ", ".join([f"{node} {self.connexity[1][node]}" for node in self.halves[1]]) + "]"
+        return representation
+
+    def _to_str_nodes(self):
+        str_nodes = [str(x) for x in self.nodes]
+        str_nodes.sort()
+        return str(str_nodes)
+
+#
+# from multiprocessing import Pool
+#
+# """ From a barcode graph, compute all the possible max d-graphs by node.
+#     @param graph A barcode graph
+#     @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
+# """
+# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1):
+#     d_graphs = FixedDGIndex(size=1)
+#     pool = Pool(processes=threads)
+#
+#     nds = list(graph.nodes())
+#     for idx, node in enumerate(nds):
+#         print(idx+1, '/', len(nds))
+#         #if "MI" not in str(node): continue    # for debugging; only look at deconvolved nodes
+#         #print(f"\r{idx+1}/{len(graph.nodes())}")
+#         neighbors = list(graph.neighbors(node))
+#         neighbors_graph = nx.Graph(graph.subgraph(neighbors))
+#
+#         node_d_graphs = set()
+#
+#         mode_str = " "
+#         if clique_mode is None:
+#             # Find all the cliques (equivalent to compute all the candidate half d-graph)
+#             cliques = []
+#             for clique in nx.find_cliques(neighbors_graph):
+#                 if len(clique) > 3:
+#                     cliques.append(clique)
+#             mode_str += "(max-cliques)"
+#         elif clique_mode == "louvain":
+#             louvain = community.best_partition(neighbors_graph) # louvain
+#             # high resolution seems to work better
+#             communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())]
+#             mode_str += "(louvain)"
+#             cliques = []
+#             for comm in communities:
+#                 # further decompose! into necessarily 2 communities
+#                 community_as_graph = nx.Graph(graph.subgraph(comm))
+#                 if len(community_as_graph.nodes()) <= 2:
+#                     cliques += [community_as_graph.nodes()]
+#                 else:
+#                     cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2))
+#
+#         elif clique_mode == "testing":
+#             # k-clique communities
+#             #from networkx.algorithms.community import k_clique_communities
+#             #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter
+#             from cdlib import algorithms
+#             cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1})
+#             #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1})
+#             #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of
+#             from collections import defaultdict
+#             cliques_dict2 = defaultdict(list)
+#             for (node, values) in cliques_dict.to_node_community_map().items():
+#                 for value in values:
+#                     cliques_dict2[value] += [node]
+#             cliques = list(cliques_dict2.values())
+#             mode_str += "(testing)"
+#
+#         if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")")
+#
+#         cliques_debugging = True
+#         if cliques_debugging:
+#
+#             from collections import Counter
+#             len_cliques = Counter(map(len,cliques))
+#
+#         # Pair halves to create d-graphes
+#         for idx, clq1 in enumerate(cliques):
+#             for clq2_idx in range(idx+1, len(cliques)):
+#                 clq2 = cliques[clq2_idx]
+#
+#                 # Check for d-graph candidates
+#                 d_graph = Dgraph(node)
+#                 d_graph.put_halves(clq1, clq2, neighbors_graph)
+#
+#                 factor = 0.5
+#                 #if clique_mode == "testing": factor = 1 # still allows louvain's big communities
+#                 #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor)
+#                 if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor:
+#                     node_d_graphs.add(d_graph)
+#
+#         # Fill the index by node
+#         key = frozenset({node})
+#         for dg in node_d_graphs:
+#             d_graphs.add_value(key, dg)
+#
+#     d_graphs.filter_by_entry()
+#     return d_graphs
+#
diff --git a/deconvolution/graph_manipulator.py b/deconvolution/dgraph/graph_manipulator.py
similarity index 100%
rename from deconvolution/graph_manipulator.py
rename to deconvolution/dgraph/graph_manipulator.py
diff --git a/deconvolution/main/__init__.py b/deconvolution/main/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/deconvolution/analyse_d2_tsv.py b/deconvolution/main/analyse_d2_tsv.py
similarity index 100%
rename from deconvolution/analyse_d2_tsv.py
rename to deconvolution/main/analyse_d2_tsv.py
diff --git a/deconvolution/d2_reduction.py b/deconvolution/main/d2_reduction.py
similarity index 93%
rename from deconvolution/d2_reduction.py
rename to deconvolution/main/d2_reduction.py
index 9eb85ff1bdb8f47f2bb42be8e72aecfeb818fded..3e0250600c8a53db3fa99b2f33a1e6614258f9b4 100755
--- a/deconvolution/d2_reduction.py
+++ b/deconvolution/main/d2_reduction.py
@@ -4,8 +4,8 @@ import networkx as nx
 import argparse
 import sys
 
-import d2_graph as d2
-import d2_algorithms as d2a
+from deconvolution.d2graph import d2_graph as d2
+from deconvolution.d2graph import d2_algorithms as d2a
 
 
 def parse_arguments():
diff --git a/deconvolution/d2_to_path.py b/deconvolution/main/d2_to_path.py
similarity index 95%
rename from deconvolution/d2_to_path.py
rename to deconvolution/main/d2_to_path.py
index e14ec936e89a9717674472166ca9bf70909b4c58..667f0c11ba6e5184f0c9f9927bd1e6014c1e0a14 100755
--- a/deconvolution/d2_to_path.py
+++ b/deconvolution/main/d2_to_path.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python3
 
 import networkx as nx
-import path_optimization as po
 import argparse
 import sys
 
-import d2_graph as d2
+from deconvolution.d2graph import d2_graph as d2, path_optimization as po
 
 
 def parse_arguments():
diff --git a/deconvolution/evaluate.py b/deconvolution/main/evaluate.py
similarity index 99%
rename from deconvolution/evaluate.py
rename to deconvolution/main/evaluate.py
index d05d75219b79bedf35a29fadad18ad27a40fc0d6..7b18f389e0b670341dfe5e921c79423c2f127167 100755
--- a/deconvolution/evaluate.py
+++ b/deconvolution/main/evaluate.py
@@ -6,8 +6,6 @@ import argparse
 from termcolor import colored
 import networkx as nx
 
-from d2_graph import D2Graph
-
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Process some integers.')
diff --git a/deconvolution/generate_fake_barcode_graph.py b/deconvolution/main/generate_fake_barcode_graph.py
similarity index 96%
rename from deconvolution/generate_fake_barcode_graph.py
rename to deconvolution/main/generate_fake_barcode_graph.py
index 15ddba102df2637e69086cbc1fc8a70f038a08f3..9d218afe55c75f75dbd05522dc2887941364b0dd 100755
--- a/deconvolution/generate_fake_barcode_graph.py
+++ b/deconvolution/main/generate_fake_barcode_graph.py
@@ -1,10 +1,10 @@
 #!/usr/bin/env python3
 
-import networkx  as nx
+import networkx as nx
 import random
 import argparse
 
-import graph_manipulator as gm
+import deconvolution.dgraph.graph_manipulator as gm
 
 
 def parse_arguments():
diff --git a/deconvolution/generate_fake_molecule_graph.py b/deconvolution/main/generate_fake_molecule_graph.py
similarity index 96%
rename from deconvolution/generate_fake_molecule_graph.py
rename to deconvolution/main/generate_fake_molecule_graph.py
index a8455e96fab075c7adf41a1ffdb9fc82f9017510..be096630174dae464042e4b321d7d6d4dbf26dea 100755
--- a/deconvolution/generate_fake_molecule_graph.py
+++ b/deconvolution/main/generate_fake_molecule_graph.py
@@ -2,7 +2,7 @@
 
 import argparse
 
-import graph_manipulator as gm
+from deconvolution.dgraph import graph_manipulator as gm
 
 
 def parse_arguments():
diff --git a/deconvolution/gexf_converter.py b/deconvolution/main/gexf_converter.py
similarity index 100%
rename from deconvolution/gexf_converter.py
rename to deconvolution/main/gexf_converter.py
diff --git a/deconvolution/main/test_index_cliques.py b/deconvolution/main/test_index_cliques.py
new file mode 100644
index 0000000000000000000000000000000000000000..34af2836800ab406faa0abf97290cd3d8d73761b
--- /dev/null
+++ b/deconvolution/main/test_index_cliques.py
@@ -0,0 +1,43 @@
+import argparse
+import networkx as nx
+from itertools import combinations
+from multiprocessing import Pool
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.')
+    parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.')
+    parser.add_argument('--threads',       '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_arguments()
+    print(args.barcode_graph)
+
+    graph = nx.read_gexf(args.barcode_graph)
+
+    for node in graph.nodes():
+        neighbors = list(graph.neighbors(node))
+        subgraph = nx.Graph(graph.subgraph(neighbors))
+
+        index_cliques(subgraph, node)
+
+
+def index_cliques(graph, node):
+    clique_index = {}
+
+    nb_cliques = 0
+    nb_unfiltered_cliques = 0
+    for clique in nx.find_cliques(graph):
+        nb_cliques += 1
+        if len(clique) > 3:
+            # print(clique)
+            nb_unfiltered_cliques += 1
+    print(nb_unfiltered_cliques, '/', nb_cliques)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/deconvolution/to_d2_graph.py b/deconvolution/main/to_d2_graph.py
similarity index 89%
rename from deconvolution/to_d2_graph.py
rename to deconvolution/main/to_d2_graph.py
index 59ed95162d89a8ab92d7961b37f3e27938307938..e8a14d74d3b5cce359ba3416b1003ac443c36c32 100755
--- a/deconvolution/to_d2_graph.py
+++ b/deconvolution/main/to_d2_graph.py
@@ -4,13 +4,14 @@ import networkx as nx
 import argparse
 import sys
 
-import d2_graph as d2
+from deconvolution.d2graph import d2_graph as d2
 
 
 def parse_arguments():
     parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.')
     parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.')
     parser.add_argument('--output_prefix', '-o', default="d2_graph",  help="Output file prefix.")
+    parser.add_argument('--threads',       '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
     parser.add_argument('--debug',         '-d', action='store_true', help="Debug")
     parser.add_argument('--maxclq',        '-c', action='store_true', help="Enable max clique community detection (default behaviour)")
     parser.add_argument('--louvain',       '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques")
@@ -53,8 +54,8 @@ def main():
     d2g = d2.D2Graph(G)
     dprint("D2 graph object created")
     dprint("constructing d2 graph from barcode graph")
-    index_size = 12 #if clique_mode is None else 3
-    d2g.construct_from_barcodes(index_size=index_size, debug=debug, clique_mode=clique_mode)
+    index_size = 8 #if clique_mode is None else 3
+    d2g.construct_from_barcodes(index_size=index_size, debug=debug, clique_mode=clique_mode, threads=args.threads)
     dprint("[debug] d2 graph constructed")
     
     d2g.save(f"{args.output_prefix}.tsv")
diff --git a/setup.py b/setup.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..926e3d9018b7a7164f6c683de95408fb6865c555 100644
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,10 @@
+from distutils.core import setup
+
+
+setup(
+    name='10X-deconvolve',
+    version='0.1dev',
+    packages=['deconvolution.d2graph', 'deconvolution.dgraph', 'deconvolution.main'],
+    license='AGPL V3',
+    long_description=open('README.md').read(),
+)
diff --git a/tests/Index_test.py b/tests/Index_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b880886d4cde530835c1d19556c9567e6df348f
--- /dev/null
+++ b/tests/Index_test.py
@@ -0,0 +1,65 @@
+import unittest
+from random import randint
+
+from deconvolution.dgraph.FixedDGIndex import FixedDGIndex
+from deconvolution.dgraph.VariableDGIndex import VariableDGIndex
+from deconvolution.dgraph.d_graph import Dgraph
+from deconvolution.dgraph.graph_manipulator import generate_d_graph_chain
+
+
+class TestIndex(unittest.TestCase):
+    def test_construction(self):
+        for _ in range(10):
+            size = randint(1, 50)
+            index = FixedDGIndex(size=size)
+            self.assertEqual(len(index), 0)
+            self.assertEqual(index.size, size)
+
+
+    def test_wrong_size_filling(self):
+        index = FixedDGIndex(size=3)
+        key = frozenset({'A', 'B'})
+        val = "Test"
+        with self.assertRaises(ValueError):
+            index.add_value(key, val)
+
+
+    def test_fill_static(self):
+        index = FixedDGIndex(size=3)
+        key = frozenset({'A', 'B', 'C'})
+        val = "Test"
+        index.add_value(key, val)
+        self.assertEqual(len(index), 1)
+        self.assertTrue(key in index)
+        self.assertEqual(index[key], {val})
+
+
+    def test_fixed_size(self):
+        dg = _generate_dg(2)
+        index = FixedDGIndex(size=2)
+        index.add_dgraph(dg)
+        self.assertEqual(len(index), 10)
+
+
+    def test_variable_size(self):
+        dg = _generate_dg(2)
+        index = VariableDGIndex(size=2)
+        index.add_dgraph(dg)
+        self.assertEqual(len(index), 16)
+
+
+def _generate_dg(d):
+    # nx graph construction
+    G = generate_d_graph_chain(2*d+1, d)
+    center = d
+    h1 = list(G.subgraph([x for x in range(d)]).nodes())
+    h2 = list(G.subgraph([2*d-x for x in range(d)]).nodes())
+
+    # d-graph construction
+    dg = Dgraph(center)
+    dg.put_halves(h1, h2, G)
+    return dg
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/d2_algorithms_test.py b/tests/d2_algorithms_test.py
index 052abf9a38afeda54571c931fb5bee1609249464..0516a8f21188d0b3d8f027aec02862872220f993 100644
--- a/tests/d2_algorithms_test.py
+++ b/tests/d2_algorithms_test.py
@@ -1,8 +1,5 @@
 import unittest
 
-import graph_manipulator as gm
-from d2_graph import D2Graph
-
 
 class TestD2Algorithms(unittest.TestCase):
 
diff --git a/tests/d2_graph_test.py b/tests/d2_graph_test.py
index e11cd35da3c33a4d15e069a9b161e12d16fc0f7c..5d787431fe32abd67ada66f80b9c615c79e9c422 100644
--- a/tests/d2_graph_test.py
+++ b/tests/d2_graph_test.py
@@ -3,36 +3,33 @@ import tempfile
 import networkx as nx
 from scipy.special import comb
 
-from d2_graph import D2Graph
-from d_graph import Dgraph
-import graph_manipulator as gm
-
-from tests.d_graph_data import complete_graph
+from deconvolution.d2graph.d2_graph import D2Graph
+from deconvolution.dgraph import graph_manipulator as gm
 
 
 class TestD2Graph(unittest.TestCase):
-    def test_construction(self):
-        d2 = D2Graph(complete_graph)
-        d2.construct_from_barcodes(index_size=4, verbose=False)
-
-        # Evaluate the number of candidate unit d_graphs generated
-        for node, candidates in d2.d_graphs_per_node.items():
-            if node == "C" or node == "B2":
-                self.assertEqual(1, len(candidates))
-            else:
-                self.assertEqual(0, len(candidates))
-
-        # Evaluate the index
-        self.assertEqual(13, len(d2.index))
-
-        overlap_key = ('A1', 'A2', 'B0', 'B1', 'B2', 'C')
-        for dmer, dg_lst in d2.index.items():
-            if dmer == overlap_key:
-                values = list(d2.index[dmer])
-                self.assertEqual(2, len(d2.index[dmer]))
-                self.assertNotEqual(values[0], values[1])
-            else:
-                self.assertEqual(1, len(d2.index[dmer]))
+    # def test_construction(self):
+    #     d2 = D2Graph(complete_graph)
+    #     d2.construct_from_barcodes(index_size=4, verbose=False)
+    #
+    #     # Evaluate the number of candidate unit d_graphs generated
+    #     for node, candidates in d2.d_graphs_per_node.items():
+    #         if node == "C" or node == "B2":
+    #             self.assertEqual(1, len(candidates))
+    #         else:
+    #             self.assertEqual(0, len(candidates))
+    #
+    #     # Evaluate the dgraph
+    #     self.assertEqual(13, len(d2.index))
+    #
+    #     overlap_key = ('A1', 'A2', 'B0', 'B1', 'B2', 'C')
+    #     for dmer, dg_lst in d2.index.items():
+    #         if dmer == overlap_key:
+    #             values = list(d2.index[dmer])
+    #             self.assertEqual(2, len(d2.index[dmer]))
+    #             self.assertNotEqual(values[0], values[1])
+    #         else:
+    #             self.assertEqual(1, len(d2.index[dmer]))
 
     def test_linear_d2_construction(self):
         for d in range(1, 10):
@@ -43,11 +40,16 @@ class TestD2Graph(unittest.TestCase):
             d2 = D2Graph(G)
             d2.construct_from_barcodes(index_size=index_k, verbose=False)
 
+
+            # for dg in d2.all_d_graphs:
+            #     print(dg.score, dg.get_link_divergence(), dg)
+            # print()
+
             # Test the number of d-graphs
             awaited_d_num = size - 2 * d
             self.assertEqual(awaited_d_num, len(d2.all_d_graphs))
 
-            # Test index
+            # Test dgraph
             awaited_index_size = comb(2 * d + 1, index_k) + (size - (2 * d + 1)) * comb(2 * d, index_k - 1)
             if len(d2.index) != awaited_index_size:
                 dmers = [list(x) for x in d2.index]
diff --git a/tests/d_graph_test.py b/tests/d_graph_test.py
index 0e714945ee609c5c2818f38bdb2ccf799ce95281..16b746c3c90d8c12f0e4ede5e9dc2e0bb775fc82 100644
--- a/tests/d_graph_test.py
+++ b/tests/d_graph_test.py
@@ -1,9 +1,8 @@
 import unittest
 
-from tests.d_graph_data import unit_d_graph
-from d_graph import Dgraph
-import graph_manipulator as gm
-
+from d_graph_data import unit_d_graph
+from deconvolution.dgraph.d_graph import Dgraph
+from deconvolution.dgraph import graph_manipulator as gm
 
 
 class TestDGraph(unittest.TestCase):
diff --git a/tests/graph_manipulation_test.py b/tests/graph_manipulation_test.py
index 033001aafd27e472a53ca51aec22172ad6a0780f..da597dd9e78b880983ec75c351cbd15deca7b7bc 100644
--- a/tests/graph_manipulation_test.py
+++ b/tests/graph_manipulation_test.py
@@ -1,7 +1,6 @@
 import unittest
 
-import graph_manipulator as gm
-
+from deconvolution.dgraph import graph_manipulator as gm
 
 
 class TestGraphManipulation(unittest.TestCase):