Commit 80495d07 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

start of d-graph based deconvolution

parent 398b47ac
# arguments: graphml file
# attempts to deconvolve a barcode graph using community detection
#!/usr/bin/env python3
import sys
import itertools
import operator
from collections import Counter
from networkx import nx
#from networkx.algorithms import community
import community # python-louvain
import networkx as nx
import sys
filename = sys.argv[1]
G = None
if filename.endswith('.graphml'):
G = nx.read_graphml(filename)
elif filename.endswith('.gexf'):
G = nx.read_gexf(filename)
def deconvolve2(G,node):
def deconvolve(G,node):
neighbors = list(G.neighbors(node))
G2 = nx.Graph(G.subgraph(neighbors))
# make sure there is a single connected components
while True:
ccs = list(nx.connected_components(G2))
if len(ccs) == 1:
# else artificially and weakly connect components to run fluid community
n1 = list(ccs[0])[0]
n2 = list(ccs[1])[0]
#cliques = list(community.asyn_fluidc(G2,2))
#cliques = list(community.girvan_newman(G2)) # too slow
#cliques = list(community.label_propagation_communities(G2)) # not very accurate
#cliques = list(community.asyn_lpa_communities(G2)) # seems accurate but hands
cliques = community.best_partition(G2)
print("louvain communities",[len([c for c,i in cliques.items() if i == clique_id]) for clique_id in set(cliques.values())])
if len(cliques) == 1:
return # nothing to deconvolve here
for node2,clique_id in cliques.items():
G.add_node(node+"-MI%d" % clique_id)
G.add_edge(node+"-MI%d" % clique_id, node2, contigs=(G[node][node2]['contigs'] if 'contigs' in G[node][node2] else ""))
# for debugging
G2.nodes[node2]['mi'] = clique_id
debug = True
if debug:
g_nodes = list(G.nodes())
for node in g_nodes:
# Extract neighbors from the graph
G_neighbors = nx.Graph(G.subgraph(neighbors))
def get_communities(G):
# Half d-graphs are cliques. So compute max cliques
cliques = list(nx.find_cliques(G))
print(cliques, "\n")
# d-graphs are composed of 2 cliques related by the current node.
# The overlapp between the 2 cliques determine the node order in the d-graph.
for idx, clq1 in enumerate(cliques):
to_compare = cliques[idx+1:]
for clq2 in to_compare:
# TO REMOVE : Temporary only check for distinct cliques
jump = False
for val in clq1:
if val in clq2:
jump = True
if jump: continue
# Check for d-graph
compute_d_graph(clq1, clq2, G)
# print(clq1, clq2, is_d_graph(clq1, clq2, G))
# Extract communites from all the possible d-graphes in the neighborood.
def compute_d_graph(clq1, clq2, G):
# Compute the arities between the cliques
arities1 = {name:0 for name in clq1}
arities2 = {name:0 for name in clq2}
sum_edges = 0
# Compute link arities
for node1 in clq1:
neighbors = list(G.neighbors(node1))
for node2 in clq2:
if node2 in neighbors:
# print(node1, "-", node2)
arities1[node1] += 1
arities2[node2] += 1
sum_edges += 1
# Reject if not enought edges
if sum_edges < len(clq1) * (len(clq1)-1) / 2:
return None
print(clq1, clq2)
print(arities1, arities2, "\n")
return None
def main():
# Parsing the input file
filename = sys.argv[1]
G = None
if filename.endswith('.graphml'):
G = nx.read_graphml(filename)
elif filename.endswith('.gexf'):
G = nx.read_gexf(filename)
# Deconvolve
g_nodes = list(G.nodes())
for node in g_nodes:
# nx.write_graphml(G,sys.argv[1]+".deconvolved.graphml")
if __name__ == "__main__":
......@@ -19,8 +19,8 @@
n = 100
o = 5
n = 1000
o = 6
molecules_intervals = []
for i in range(n):
......@@ -51,4 +51,4 @@ for i,a in enumerate(molecules_intervals):
nx.write_graphml(G, f"data/simulated_molecules_{n}_{o}.graphml")
nx.write_graphml(G, f"data/simulated_molecules_{n}_{o-1}.graphml")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment