### overlapping deconvolution

parent e8142332
 #!/usr/bin/env python3 import sys import math import networkx as nx import itertools def deconvolve(G,node): def deconvolve(G,node, verbose=0): neighbors = list(G.neighbors(node)) print("node",node,len(neighbors),"neighbors") nei_len = len(neighbors) # Extract neighbors from the graph G_neighbors = nx.Graph(G.subgraph(neighbors)) communities = get_communities(G_neighbors, node=="273:597_148") communities = get_communities(G_neighbors, verbose=verbose-1) # Continue only if something need to be splited. if len(communities) == 1: ... ... @@ -30,16 +31,20 @@ def deconvolve(G,node): # Remove old node G.remove_node(node) print("splitted into", len(communities), "parts\n") if verbose > 0: print("node",node,nei_len,"neighbors") print("splitted into", len(communities), "parts\n") def get_communities(G, max_overlap=2, verbose=False): def get_communities(G, max_overlap=1, verbose=0): # Half d-graphs are cliques. So compute max cliques cliques = list(nx.find_cliques(G)) if verbose: if verbose > 0: print("clique list") for clq in cliques: print(clq, "\n") print(clq) print() candidate_d_graphs = [] ... ... @@ -56,16 +61,17 @@ def get_communities(G, max_overlap=2, verbose=False): if val in clq2: overlap += 1 if overlap > max_overlap: # print(overlap, "is too high overlap") continue # Check for d-graph candidates d_graph = compute_d_graph(clq1, clq2, G) d_graph = compute_d_graph(clq1, clq2, G, verbose=verbose-1) if d_graph != None: candidate_d_graphs.append(d_graph) # Extract communites from all the possible d-graphes in the neighborood. # This is a minimal covering d_graph algorithm. minimal_d_graphes = filter_d_graphs(candidate_d_graphs) minimal_d_graphes = filter_d_graphs(candidate_d_graphs, max_overlap=max_overlap) # If no community detected, return one big. if len(minimal_d_graphes) == 0: ... ... @@ -87,16 +93,34 @@ def get_communities(G, max_overlap=2, verbose=False): @param G the graph of the neighbors of the central node (not present). @return A pair of lists that are the 2 halves of the d-graph ordered from the center. """ def compute_d_graph(clq1, clq2, G, verbose=False): def compute_d_graph(clq1, clq2, G, max_diff_size=1, verbose=0): # Compute the arities between the cliques arities1 = {name:0 for name in clq1} arities2 = {name:0 for name in clq2} sum_edges = 0 # TODO : Remove this part and improve the detection if len(clq1) != len(clq2): return None # /TODO # Limit the number of recursions if abs(len(clq1)-len(clq2)) > max_diff_size: return None # Recursion on the biggest clique to reduce complexity. smallest, largest = (clq1, clq2) if len(clq2) > len(clq1) else (clq2, clq1) minimal_weighted_d_graph = None minimal_weight = math.inf for idx in range(len(largest)): recur_d_graph = compute_d_graph(smallest, largest[:idx]+largest[idx+1:], G, verbose=verbose) if recur_d_graph != None and recur_d_graph < minimal_weight: minimal_weighted_d_graph = recur_d_graph minimal_weight = recur_d_graph if verbose > 0: print(f"Recursive calls for:\n{clq1}\n{clq2}\n") print(minimal_weighted_d_graph, "\n") print("/ Recursive\n") return minimal_weighted_d_graph min_clq_size = min(len(clq1), len(clq2)) ... ... @@ -105,15 +129,15 @@ def compute_d_graph(clq1, clq2, G, verbose=False): neighbors = list(G.neighbors(node1)) for node2 in clq2: if node2 in neighbors: if node1 == node2 or node2 in neighbors: # print(node1, "-", node2) arities1[node1] += 1 arities2[node2] += 1 sum_edges += 1 if verbose: print(clq1, clq2) print(arities1, arities2, "\n") # if verbose: # print(clq1, clq2) # print(arities1, arities2, "\n") # Reject if not enought edges if sum_edges < min_clq_size * (min_clq_size-1) / 2: ... ... @@ -127,40 +151,63 @@ def compute_d_graph(clq1, clq2, G, verbose=False): lst1 = [key for key, value in sorted(arities1.items(), key=lambda tup: -tup)] lst2 = [key for key, value in sorted(arities2.items(), key=lambda tup: -tup)] if verbose: if verbose > 0: print(min_clq_size) print(lst1, "\n", lst2, "\n") # Return the 2 halves of the d-graph return lst1, lst2 return lst1, lst2, sum_edges """ Filter the candiates regarding their compatibilities """ def filter_d_graphs(candidates): # Count for each node the number of their apparition counts = {} def filter_d_graphs(candidates, max_overlap=0): # Count for each node the number of their apparition (regarding the half overlap) selected = {} counts_by_size = [{} for _ in range(max_overlap+1)] sorted_d_graphs = [[] for _ in range(max_overlap+1)] for d_graph in candidates: # Compute intersection of the two halves common_length = len(set(d_graph) & set(d_graph)) sorted_d_graphs[common_length].append(d_graph) # Count occurences for node in itertools.chain(d_graph, d_graph): if not node in counts: counts[node] = 0 counts[node] += 1 if not node in counts_by_size[common_length]: counts_by_size[common_length][node] = 0 counts_by_size[common_length][node] += 1 selected[node] = False # take d_graphes with nodes that appears only once filtered = [] selected = {node:False for node in counts.keys()} for d_graph in candidates: for node in itertools.chain(d_graph, d_graph): if counts[node] == 1: # Add the d_graph to the selection filtered.append(d_graph) # register selection of the nodes for node in itertools.chain(d_graph, d_graph): selected[node] = True # Over for this d-graph for overlap_size in range(max_overlap+1): # Look for d_graphs with overlapping halves first, then 1 node, ... for d_graph in sorted_d_graphs[overlap_size]: common_length = len(set(d_graph) & set(d_graph)) for node in itertools.chain(d_graph, d_graph): # Count appearance total_count = 0 for length in range(overlap_size+1): total_count += counts_by_size[common_length][node] if node in counts_by_size[common_length] else 0 # Add d-graph if total_count == 1: # Add the d_graph to the selection filtered.append(d_graph) # register selection of the nodes for node in itertools.chain(d_graph, d_graph): selected[node] = True # Over for this d-graph break # Stop if all nodes are selected over = True for val in selected.values(): if not val: over = False break if over: break # TODO : improve performances when there are no uniq solution for val in selected.values(): ... ... @@ -184,7 +231,7 @@ def main(): # Deconvolve g_nodes = list(G.nodes()) for node in g_nodes: deconvolve(G,node) deconvolve(G,node, verbose=1)# if (node=="273:597_148") else 0) # exit() print(len(g_nodes), "->", len(list(G.nodes()))) ... ...
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!