diff --git a/Snakefile_d2 b/Snakefile_d2 index 46f48433950e05678720936c97462b75845538df..ee50352c534f3be22ef8f66ba4451724bc920d2b 100644 --- a/Snakefile_d2 +++ b/Snakefile_d2 @@ -17,15 +17,16 @@ rule compress_data: input: barcode_graph=f"{WORKDIR}/{{barcode_file}}.gexf", d2_raw=f"{WORKDIR}/{{barcode_file}}_d2_raw_maxclq.gexf", - simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf", - d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf", - simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf" + simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf" + # d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf", + # simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf" output: zip=f"{WORKDIR}/{{barcode_file}}.tar.gz" shell: f"cd {WORKDIR}/ ;" "if [ ! -d {wildcards.barcode_file} ]; then mkdir {wildcards.barcode_file} ; fi;" - "mv *.gexf {wildcards.barcode_file}/ ;" + "mv {wildcards.barcode_file}_* {wildcards.barcode_file}/ ;" + "cp {wildcards.barcode_file}.gexf {wildcards.barcode_file}/ ;" "tar czf {wildcards.barcode_file}.tar.gz {wildcards.barcode_file}/ ;" "rm -rf {wildcards.barcode_file}/ ;" "cd - ;" diff --git a/deconvolution/d2graph/d2_graph.py b/deconvolution/d2graph/d2_graph.py index f7d6a77444148c723cceb6820c0df99331972a3c..abfd9780185f7025787f9f331ca8b1189d944365 100644 --- a/deconvolution/d2graph/d2_graph.py +++ b/deconvolution/d2graph/d2_graph.py @@ -4,7 +4,10 @@ from bidict import bidict import sys from deconvolution.dgraph.FixedDGIndex import FixedDGIndex -from deconvolution.dgraph.d_graph import Dgraph, compute_all_max_d_graphs +from deconvolution.dgraph.VariableDGIndex import VariableDGIndex +from deconvolution.dgraph.d_graph import Dgraph +from deconvolution.dgraph.CliqueDGFactory import CliqueDGFactory +from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory class D2Graph(nx.Graph): @@ -56,11 +59,16 @@ class D2Graph(nx.Graph): return self.subgraph(list(self.nodes())) - def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None): + def construct_from_barcodes(self, index_size=3, verbose=True, debug=False, clique_mode=None, threads=1): # Compute all the d-graphs if verbose: print("Computing the unit d-graphs..") - self.d_graphs_per_node = compute_all_max_d_graphs(self.barcode_graph, debug=debug, clique_mode=clique_mode) + dg_factory = None + if clique_mode == "louvain": + dg_factory = LouvainDGFactory(self.barcode_graph) + else: + dg_factory = CliqueDGFactory(self.barcode_graph) + self.d_graphs_per_node = dg_factory.generate_all_dgraphs(debug=True) if verbose: counts = sum(len(x) for x in self.d_graphs_per_node.values()) print(f"\t {counts} computed d-graphs") @@ -75,9 +83,17 @@ class D2Graph(nx.Graph): # Index all the d-graphs if verbose: print("Compute the dmer dgraph") - self.index = FixedDGIndex(size=index_size) - for dg in self.all_d_graphs: + print("\tIndexing") + # self.index = FixedDGIndex(size=index_size) + self.index = VariableDGIndex(size=2) + for idx, dg in enumerate(self.all_d_graphs): + if verbose: + print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='') self.index.add_dgraph(dg) + # self.var_index.add_dgraph(dg) + if verbose: + print() + print("\tFilter index") self.index.filter_by_entry() # self.index = self.create_index_from_tuples(index_size, verbose=verbose) # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose) @@ -201,55 +217,3 @@ class D2Graph(nx.Graph): # Distance computing and adding in the dist dicts d = dg1.distance_to(dg2) data["distance"] = d - - - def filter_dominated_in_index(self, tuple_size=3, verbose=True): - to_remove = set() - - if verbose: - print("\tFilter dominated in dgraph") - - # Find dominated - for dmer_idx, item in enumerate(self.index.items()): - dmer, dg_list = item - - if verbose: - sys.stdout.write(f"\r\t{dmer_idx+1}/{len(self.index)}") - sys.stdout.flush() - - undominated = list_domination_filter(dg_list) - - # Register dominated - if len(dg_list) != len(undominated): - for dg in dg_list: - if dg not in undominated: - to_remove.add(dg) - - self.index[dmer] = undominated - - if verbose: - print() - print("\tDmer removal") - - removable_dmers = set() - for r_idx, r_dg in enumerate(to_remove): - if verbose: - sys.stdout.write(f"\r\t{r_idx+1}/{len(to_remove)}") - sys.stdout.flush() - - self.all_d_graphs.remove(r_dg) - self.d_graphs_per_node[r_dg.center].remove(r_dg) - - # Remove dominated in dgraph - for dmer in itertools.combinations(r_dg.to_sorted_list(), tuple_size): - if r_dg in self.index[dmer]: - self.index[dmer] = list(filter(lambda x: x!=r_dg, self.index[dmer])) - if len(self.index[dmer]) == 0: - removable_dmers.add(dmer) - - # Remove empty dmers - for dmer in removable_dmers: - del self.index[dmer] - if verbose: - print() - diff --git a/deconvolution/dgraph/AbstractDGFactory.py b/deconvolution/dgraph/AbstractDGFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..3ebf1059f9d03e4f7a10e296fe6b1c5f98891821 --- /dev/null +++ b/deconvolution/dgraph/AbstractDGFactory.py @@ -0,0 +1,115 @@ +import networkx as nx +from abc import abstractmethod + +from deconvolution.dgraph.FixedDGIndex import FixedDGIndex + + +class AbstractDGFactory: + def __init__(self, graph): + self.graph = graph + + + def generate_all_dgraphs(self, debug=False): + index = FixedDGIndex(size=1) + + nb_nodes = len(self.graph.nodes()) + for idx, node in enumerate(self.graph.nodes()): + if debug: print(f"\r{idx+1}/{nb_nodes} node analysis", end='') + neighbors = list(self.graph.neighbors(node)) + subgraph = nx.Graph(self.graph.subgraph(neighbors)) + + # Fill the index by node + key = frozenset({node}) + for dg in self.generate_by_node(node, subgraph): + index.add_value(key, dg) + + index.filter_by_entry() + return index + + + @abstractmethod + def generate_by_node(self, node, subgraph): + pass + + +# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1): +# d_graphs = FixedDGIndex(size=1) +# +# nds = list(graph.nodes()) +# for idx, node in enumerate(nds): +# # print(idx+1, '/', len(nds)) +# #if "MI" not in str(node): continue # for debugging; only look at deconvolved nodes +# #print(f"\r{idx+1}/{len(graph.nodes())}") +# neighbors = list(graph.neighbors(node)) +# neighbors_graph = nx.Graph(graph.subgraph(neighbors)) +# +# node_d_graphs = set() +# +# mode_str = " " +# if clique_mode is None: +# # Find all the cliques (equivalent to compute all the candidate half d-graph) +# cliques = [] +# for clique in nx.find_cliques(neighbors_graph): +# if len(clique) > 3: +# cliques.append(clique) +# mode_str += "(max-cliques)" +# elif clique_mode == "louvain": +# louvain = community.best_partition(neighbors_graph) # louvain +# # high resolution seems to work better +# communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())] +# mode_str += "(louvain)" +# cliques = [] +# for comm in communities: +# # further decompose! into necessarily 2 communities +# community_as_graph = nx.Graph(graph.subgraph(comm)) +# if len(community_as_graph.nodes()) <= 2: +# cliques += [community_as_graph.nodes()] +# else: +# cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2)) +# +# elif clique_mode == "testing": +# # k-clique communities +# #from networkx.algorithms.community import k_clique_communities +# #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter +# from cdlib import algorithms +# cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1}) +# #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1}) +# #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of +# from collections import defaultdict +# cliques_dict2 = defaultdict(list) +# for (node, values) in cliques_dict.to_node_community_map().items(): +# for value in values: +# cliques_dict2[value] += [node] +# cliques = list(cliques_dict2.values()) +# mode_str += "(testing)" +# +# if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")") +# +# cliques_debugging = True +# if cliques_debugging: +# +# from collections import Counter +# len_cliques = Counter(map(len,cliques)) +# +# # Pair halves to create d-graphes +# for idx, clq1 in enumerate(cliques): +# for clq2_idx in range(idx+1, len(cliques)): +# clq2 = cliques[clq2_idx] +# +# # Check for d-graph candidates +# d_graph = Dgraph(node) +# d_graph.put_halves(clq1, clq2, neighbors_graph) +# +# factor = 0.5 +# #if clique_mode == "testing": factor = 1 # still allows louvain's big communities +# #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor) +# if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor: +# node_d_graphs.add(d_graph) +# +# # Fill the index by node +# key = frozenset({node}) +# for dg in node_d_graphs: +# d_graphs.add_value(key, dg) +# +# d_graphs.filter_by_entry() +# return d_graphs diff --git a/deconvolution/dgraph/CliqueDGFactory.py b/deconvolution/dgraph/CliqueDGFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..07959e6f47c91e0f37fc7821c15f5e7ffe1c9e44 --- /dev/null +++ b/deconvolution/dgraph/CliqueDGFactory.py @@ -0,0 +1,37 @@ +import networkx as nx + +from deconvolution.dgraph.AbstractDGFactory import AbstractDGFactory +from deconvolution.dgraph.d_graph import Dgraph + +class CliqueDGFactory(AbstractDGFactory): + + def __init__(self, graph, min_size_clique=4, dg_max_divergence_factor=0.5): + super(CliqueDGFactory, self).__init__(graph) + self.min_size = min_size_clique + self.dg_max_divergence_factor = dg_max_divergence_factor + + + def generate_by_node(self, node, subgraph): + node_d_graphs = set() + + # Clique computation + cliques = [] + for clique in nx.find_cliques(subgraph): + if len(clique) >= self.min_size: + cliques.append(clique) + + # TODO: Index cliques to pair them faster + + # d-graph computation + for idx, clq1 in enumerate(cliques): + for clq2_idx in range(idx+1, len(cliques)): + clq2 = cliques[clq2_idx] + + # Check for d-graph candidates + d_graph = Dgraph(node) + d_graph.put_halves(clq1, clq2, subgraph) + + if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * self.dg_max_divergence_factor: + node_d_graphs.add(d_graph) + + return node_d_graphs diff --git a/deconvolution/dgraph/LouvainDGFactory.py b/deconvolution/dgraph/LouvainDGFactory.py new file mode 100644 index 0000000000000000000000000000000000000000..c4dbe688ec9519dbdb9907262d0a917e1330cc65 --- /dev/null +++ b/deconvolution/dgraph/LouvainDGFactory.py @@ -0,0 +1,42 @@ +import networkx as nx + +from deconvolution.dgraph.AbstractDGFactory import AbstractDGFactory +from deconvolution.dgraph.d_graph import Dgraph +import community + +class LouvainDGFactory(AbstractDGFactory): + + def __init__(self, graph, dg_max_divergence_factor=0.5): + super(LouvainDGFactory, self).__init__(graph) + self.dg_max_divergence_factor = dg_max_divergence_factor + + + def generate_by_node(self, node, subgraph): + node_d_graphs = set() + + louvain = community.best_partition(subgraph) # louvain + # high resolution seems to work better + communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())] + cliques = [] + for comm in communities: + # further decompose! into necessarily 2 communities + community_as_graph = nx.Graph(subgraph.subgraph(comm)) + if len(community_as_graph.nodes()) <= 2: + cliques += [community_as_graph.nodes()] + else: + cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2)) + + + # d-graph computation + for idx, clq1 in enumerate(cliques): + for clq2_idx in range(idx+1, len(cliques)): + clq2 = cliques[clq2_idx] + + # Check for d-graph candidates + d_graph = Dgraph(node) + d_graph.put_halves(clq1, clq2, subgraph) + + if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * self.dg_max_divergence_factor: + node_d_graphs.add(d_graph) + + return node_d_graphs diff --git a/deconvolution/dgraph/d_graph.py b/deconvolution/dgraph/d_graph.py index e07ded4cedcc03b4b2229a49434b57d1c69b2415..b2a915c114807f24b0d3840e799d46f523673513 100644 --- a/deconvolution/dgraph/d_graph.py +++ b/deconvolution/dgraph/d_graph.py @@ -1,8 +1,5 @@ -import networkx as nx from functools import total_ordering -import community # pip install python-louvain TODO: Ajouter dans les requirements !!! -from deconvolution.dgraph.FixedDGIndex import FixedDGIndex @total_ordering @@ -229,175 +226,93 @@ class Dgraph(object): str_nodes.sort() return str(str_nodes) - -""" From a barcode graph, compute all the possible max d-graphs by node. - @param graph A barcode graph - @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio. -""" -def compute_all_max_d_graphs(graph, debug=False, clique_mode=None): - d_graphs = FixedDGIndex(size=1) - - for idx, node in enumerate(list(graph.nodes())): - #if "MI" not in str(node): continue # for debugging; only look at deconvolved nodes - #print(f"\r{idx+1}/{len(graph.nodes())}") - neighbors = list(graph.neighbors(node)) - neighbors_graph = nx.Graph(graph.subgraph(neighbors)) - - node_d_graphs = set() - - mode_str = " " - if clique_mode is None: - # Find all the cliques (equivalent to compute all the candidate half d-graph) - cliques = list(nx.find_cliques(neighbors_graph)) - mode_str += "(max-cliques)" - elif clique_mode == "louvain": - louvain = community.best_partition(neighbors_graph) # louvain - # high resolution seems to work better - communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())] - mode_str += "(louvain)" - cliques = [] - for comm in communities: - # further decompose! into necessarily 2 communities - community_as_graph = nx.Graph(graph.subgraph(comm)) - if len(community_as_graph.nodes()) <= 2: - cliques += [community_as_graph.nodes()] - else: - cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2)) - - elif clique_mode == "testing": - # k-clique communities - #from networkx.algorithms.community import k_clique_communities - #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter - from cdlib import algorithms - cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1}) - #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1}) - #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of - from collections import defaultdict - cliques_dict2 = defaultdict(list) - for (node, values) in cliques_dict.to_node_community_map().items(): - for value in values: - cliques_dict2[value] += [node] - cliques = list(cliques_dict2.values()) - mode_str += "(testing)" - - if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")") - - cliques_debugging = True - if cliques_debugging: - - from collections import Counter - len_cliques = Counter(map(len,cliques)) - - # Pair halves to create d-graphes - for idx, clq1 in enumerate(cliques): - for clq2_idx in range(idx+1, len(cliques)): - clq2 = cliques[clq2_idx] - - # Check for d-graph candidates - d_graph = Dgraph(node) - d_graph.put_halves(clq1, clq2, neighbors_graph) - - factor = 0.5 - #if clique_mode == "testing": factor = 1 # still allows louvain's big communities - #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor) - if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor: - node_d_graphs.add(d_graph) - - # Fill the index by node - key = frozenset({node}) - for dg in node_d_graphs: - d_graphs.add_value(key, dg) - - d_graphs.filter_by_entry() - return d_graphs - - -# """ Add the new dg in the dgs list. If dg is dominated by another dg in the list, then it's -# dropped. If any dg in the list is dominated by the dg to add, then, the new dg is added and -# all the dominated dg are removed from the list. -# @param dg A new dg to add/filter. -# @param undominated_dgs_list A list of dg where any of them is dominated by another one. -# @return The updated undominated list. -# """ -# def add_new_dg_regarding_domination(dg, undominated_dgs_list): -# to_remove = [] -# dominated = False -# -# # Search for domination relations -# for u_dg in undominated_dgs_list: -# if not dominated and dg.is_dominated(u_dg): -# dominated = True -# if u_dg.is_dominated(dg): -# to_remove.append(u_dg) -# -# # Remove dominated values -# size = len(undominated_dgs_list) -# for dg2 in to_remove: -# undominated_dgs_list.remove(dg2) -# -# # Add the new dg -# if not dominated: -# undominated_dgs_list.append(dg) -# -# return undominated_dgs_list # +# from multiprocessing import Pool # +# """ From a barcode graph, compute all the possible max d-graphs by node. +# @param graph A barcode graph +# @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio. +# """ +# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1): +# d_graphs = FixedDGIndex(size=1) +# pool = Pool(processes=threads) # -# def filter_dominated(d_graphs, overall=False, in_place=True): -# if not overall: -# return local_domination_filter(d_graphs, in_place) +# nds = list(graph.nodes()) +# for idx, node in enumerate(nds): +# print(idx+1, '/', len(nds)) +# #if "MI" not in str(node): continue # for debugging; only look at deconvolved nodes +# #print(f"\r{idx+1}/{len(graph.nodes())}") +# neighbors = list(graph.neighbors(node)) +# neighbors_graph = nx.Graph(graph.subgraph(neighbors)) # -# all_d_graphs = [] -# for dgs in d_graphs.values(): -# all_d_graphs.extend(dgs) +# node_d_graphs = set() # -# all_d_graphs = list_domination_filter(all_d_graphs) +# mode_str = " " +# if clique_mode is None: +# # Find all the cliques (equivalent to compute all the candidate half d-graph) +# cliques = [] +# for clique in nx.find_cliques(neighbors_graph): +# if len(clique) > 3: +# cliques.append(clique) +# mode_str += "(max-cliques)" +# elif clique_mode == "louvain": +# louvain = community.best_partition(neighbors_graph) # louvain +# # high resolution seems to work better +# communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())] +# mode_str += "(louvain)" +# cliques = [] +# for comm in communities: +# # further decompose! into necessarily 2 communities +# community_as_graph = nx.Graph(graph.subgraph(comm)) +# if len(community_as_graph.nodes()) <= 2: +# cliques += [community_as_graph.nodes()] +# else: +# cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2)) # -# return d_graphs +# elif clique_mode == "testing": +# # k-clique communities +# #from networkx.algorithms.community import k_clique_communities +# #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter +# from cdlib import algorithms +# cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1}) +# #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1}) +# #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of +# from collections import defaultdict +# cliques_dict2 = defaultdict(list) +# for (node, values) in cliques_dict.to_node_community_map().items(): +# for value in values: +# cliques_dict2[value] += [node] +# cliques = list(cliques_dict2.values()) +# mode_str += "(testing)" # +# if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")") # -# """ Filter the d-graphs by node. In a list of d-graph centered on a node n, if a d-graph is -# completely included in another and have a highest distance score to the optimal, then it is -# filtered out. -# @param d_graphs All the d-graphs to filter, sorted by central node. -# @param in_place If true, modify the content of d_graph with the filtered version. If False, -# copy all the content in a new dictionary. -# @return The filtered dictionary of d-graph per node. -# """ -# def local_domination_filter(d_graphs, in_place=True): -# filtered = d_graphs if in_place else {} +# cliques_debugging = True +# if cliques_debugging: # -# # Filter node by node -# for node, d_graph_list in d_graphs.items(): -# lst = str(sorted([str(x) for x in d_graph_list])) -# filtered[node] = brutal_list_domination_filter(d_graph_list, node_name=str(node)) +# from collections import Counter +# len_cliques = Counter(map(len,cliques)) # -# return filtered +# # Pair halves to create d-graphes +# for idx, clq1 in enumerate(cliques): +# for clq2_idx in range(idx+1, len(cliques)): +# clq2 = cliques[clq2_idx] # +# # Check for d-graph candidates +# d_graph = Dgraph(node) +# d_graph.put_halves(clq1, clq2, neighbors_graph) # -# """ Filter the input d-graphs list. In the list of d-graph centered on a node n, if a d-graph is -# completely included in another and have a highest distance score to the optimal, then it is -# filtered out. -# @param d_graphs All the d-graphs to filter. -# @return The filtered dictionary of d-graph per node. -# """ -# def list_domination_filter(d_graphs): -# filtered = [] +# factor = 0.5 +# #if clique_mode == "testing": factor = 1 # still allows louvain's big communities +# #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor) +# if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor: +# node_d_graphs.add(d_graph) # -# # Filter d-graph by d-graph -# for dg in d_graphs: -# add_new_dg_regarding_domination(dg, filtered) +# # Fill the index by node +# key = frozenset({node}) +# for dg in node_d_graphs: +# d_graphs.add_value(key, dg) # -# return set(filtered) -# -# def brutal_list_domination_filter(d_graphs, node_name=""): -# undominated = set(d_graphs) -# to_remove = set() -# -# for dg1 in d_graphs: -# for dg2 in d_graphs: -# if dg1.is_dominated(dg2): -# to_remove.add(dg1) -# break +# d_graphs.filter_by_entry() +# return d_graphs # -# return undominated - to_remove diff --git a/deconvolution/main/test_index_cliques.py b/deconvolution/main/test_index_cliques.py new file mode 100644 index 0000000000000000000000000000000000000000..34af2836800ab406faa0abf97290cd3d8d73761b --- /dev/null +++ b/deconvolution/main/test_index_cliques.py @@ -0,0 +1,43 @@ +import argparse +import networkx as nx +from itertools import combinations +from multiprocessing import Pool + + +def parse_arguments(): + parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.') + parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.') + parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation') + + args = parser.parse_args() + return args + + +def main(): + args = parse_arguments() + print(args.barcode_graph) + + graph = nx.read_gexf(args.barcode_graph) + + for node in graph.nodes(): + neighbors = list(graph.neighbors(node)) + subgraph = nx.Graph(graph.subgraph(neighbors)) + + index_cliques(subgraph, node) + + +def index_cliques(graph, node): + clique_index = {} + + nb_cliques = 0 + nb_unfiltered_cliques = 0 + for clique in nx.find_cliques(graph): + nb_cliques += 1 + if len(clique) > 3: + # print(clique) + nb_unfiltered_cliques += 1 + print(nb_unfiltered_cliques, '/', nb_cliques) + + +if __name__ == "__main__": + main() diff --git a/deconvolution/main/to_d2_graph.py b/deconvolution/main/to_d2_graph.py index dc409acf7fd7ac63e6034e40fcaeb0948a2c8175..e8a14d74d3b5cce359ba3416b1003ac443c36c32 100755 --- a/deconvolution/main/to_d2_graph.py +++ b/deconvolution/main/to_d2_graph.py @@ -11,6 +11,7 @@ def parse_arguments(): parser = argparse.ArgumentParser(description='Transform a 10X barcode graph into a d2 graph. The program dig for the d-graphs and then merge them into a d2-graph.') parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.') parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.") + parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation') parser.add_argument('--debug', '-d', action='store_true', help="Debug") parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)") parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques") @@ -54,7 +55,7 @@ def main(): dprint("D2 graph object created") dprint("constructing d2 graph from barcode graph") index_size = 8 #if clique_mode is None else 3 - d2g.construct_from_barcodes(index_size=index_size, debug=debug, clique_mode=clique_mode) + d2g.construct_from_barcodes(index_size=index_size, debug=debug, clique_mode=clique_mode, threads=args.threads) dprint("[debug] d2 graph constructed") d2g.save(f"{args.output_prefix}.tsv")