Commit 66948796 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

overlapping deconvolution

parent e8142332
#!/usr/bin/env python3
import sys
import math
import networkx as nx
import itertools
def deconvolve(G,node):
def deconvolve(G,node, verbose=0):
neighbors = list(G.neighbors(node))
print("node",node,len(neighbors),"neighbors")
nei_len = len(neighbors)
# Extract neighbors from the graph
G_neighbors = nx.Graph(G.subgraph(neighbors))
communities = get_communities(G_neighbors, node=="273:597_148")
communities = get_communities(G_neighbors, verbose=verbose-1)
# Continue only if something need to be splited.
if len(communities) == 1:
......@@ -30,16 +31,20 @@ def deconvolve(G,node):
# Remove old node
G.remove_node(node)
print("splitted into", len(communities), "parts\n")
if verbose > 0:
print("node",node,nei_len,"neighbors")
print("splitted into", len(communities), "parts\n")
def get_communities(G, max_overlap=2, verbose=False):
def get_communities(G, max_overlap=1, verbose=0):
# Half d-graphs are cliques. So compute max cliques
cliques = list(nx.find_cliques(G))
if verbose:
if verbose > 0:
print("clique list")
for clq in cliques:
print(clq, "\n")
print(clq)
print()
candidate_d_graphs = []
......@@ -56,16 +61,17 @@ def get_communities(G, max_overlap=2, verbose=False):
if val in clq2:
overlap += 1
if overlap > max_overlap:
# print(overlap, "is too high overlap")
continue
# Check for d-graph candidates
d_graph = compute_d_graph(clq1, clq2, G)
d_graph = compute_d_graph(clq1, clq2, G, verbose=verbose-1)
if d_graph != None:
candidate_d_graphs.append(d_graph)
# Extract communites from all the possible d-graphes in the neighborood.
# This is a minimal covering d_graph algorithm.
minimal_d_graphes = filter_d_graphs(candidate_d_graphs)
minimal_d_graphes = filter_d_graphs(candidate_d_graphs, max_overlap=max_overlap)
# If no community detected, return one big.
if len(minimal_d_graphes) == 0:
......@@ -87,16 +93,34 @@ def get_communities(G, max_overlap=2, verbose=False):
@param G the graph of the neighbors of the central node (not present).
@return A pair of lists that are the 2 halves of the d-graph ordered from the center.
"""
def compute_d_graph(clq1, clq2, G, verbose=False):
def compute_d_graph(clq1, clq2, G, max_diff_size=1, verbose=0):
# Compute the arities between the cliques
arities1 = {name:0 for name in clq1}
arities2 = {name:0 for name in clq2}
sum_edges = 0
# TODO : Remove this part and improve the detection
if len(clq1) != len(clq2):
return None
# /TODO
# Limit the number of recursions
if abs(len(clq1)-len(clq2)) > max_diff_size:
return None
# Recursion on the biggest clique to reduce complexity.
smallest, largest = (clq1, clq2) if len(clq2) > len(clq1) else (clq2, clq1)
minimal_weighted_d_graph = None
minimal_weight = math.inf
for idx in range(len(largest)):
recur_d_graph = compute_d_graph(smallest, largest[:idx]+largest[idx+1:], G, verbose=verbose)
if recur_d_graph != None and recur_d_graph[2] < minimal_weight:
minimal_weighted_d_graph = recur_d_graph
minimal_weight = recur_d_graph[2]
if verbose > 0:
print(f"Recursive calls for:\n{clq1}\n{clq2}\n")
print(minimal_weighted_d_graph, "\n")
print("/ Recursive\n")
return minimal_weighted_d_graph
min_clq_size = min(len(clq1), len(clq2))
......@@ -105,15 +129,15 @@ def compute_d_graph(clq1, clq2, G, verbose=False):
neighbors = list(G.neighbors(node1))
for node2 in clq2:
if node2 in neighbors:
if node1 == node2 or node2 in neighbors:
# print(node1, "-", node2)
arities1[node1] += 1
arities2[node2] += 1
sum_edges += 1
if verbose:
print(clq1, clq2)
print(arities1, arities2, "\n")
# if verbose:
# print(clq1, clq2)
# print(arities1, arities2, "\n")
# Reject if not enought edges
if sum_edges < min_clq_size * (min_clq_size-1) / 2:
......@@ -127,40 +151,63 @@ def compute_d_graph(clq1, clq2, G, verbose=False):
lst1 = [key for key, value in sorted(arities1.items(), key=lambda tup: -tup[1])]
lst2 = [key for key, value in sorted(arities2.items(), key=lambda tup: -tup[1])]
if verbose:
if verbose > 0:
print(min_clq_size)
print(lst1, "\n", lst2, "\n")
# Return the 2 halves of the d-graph
return lst1, lst2
return lst1, lst2, sum_edges
""" Filter the candiates regarding their compatibilities
"""
def filter_d_graphs(candidates):
# Count for each node the number of their apparition
counts = {}
def filter_d_graphs(candidates, max_overlap=0):
# Count for each node the number of their apparition (regarding the half overlap)
selected = {}
counts_by_size = [{} for _ in range(max_overlap+1)]
sorted_d_graphs = [[] for _ in range(max_overlap+1)]
for d_graph in candidates:
# Compute intersection of the two halves
common_length = len(set(d_graph[0]) & set(d_graph[1]))
sorted_d_graphs[common_length].append(d_graph)
# Count occurences
for node in itertools.chain(d_graph[0], d_graph[1]):
if not node in counts:
counts[node] = 0
counts[node] += 1
if not node in counts_by_size[common_length]:
counts_by_size[common_length][node] = 0
counts_by_size[common_length][node] += 1
selected[node] = False
# take d_graphes with nodes that appears only once
filtered = []
selected = {node:False for node in counts.keys()}
for d_graph in candidates:
for node in itertools.chain(d_graph[0], d_graph[1]):
if counts[node] == 1:
# Add the d_graph to the selection
filtered.append(d_graph)
# register selection of the nodes
for node in itertools.chain(d_graph[0], d_graph[1]):
selected[node] = True
# Over for this d-graph
for overlap_size in range(max_overlap+1):
# Look for d_graphs with overlapping halves first, then 1 node, ...
for d_graph in sorted_d_graphs[overlap_size]:
common_length = len(set(d_graph[0]) & set(d_graph[1]))
for node in itertools.chain(d_graph[0], d_graph[1]):
# Count appearance
total_count = 0
for length in range(overlap_size+1):
total_count += counts_by_size[common_length][node] if node in counts_by_size[common_length] else 0
# Add d-graph
if total_count == 1:
# Add the d_graph to the selection
filtered.append(d_graph)
# register selection of the nodes
for node in itertools.chain(d_graph[0], d_graph[1]):
selected[node] = True
# Over for this d-graph
break
# Stop if all nodes are selected
over = True
for val in selected.values():
if not val:
over = False
break
if over: break
# TODO : improve performances when there are no uniq solution
for val in selected.values():
......@@ -184,7 +231,7 @@ def main():
# Deconvolve
g_nodes = list(G.nodes())
for node in g_nodes:
deconvolve(G,node)
deconvolve(G,node, verbose=1)# if (node=="273:597_148") else 0)
# exit()
print(len(g_nodes), "->", len(list(G.nodes())))
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment