Commit e6072e12 authored by Yoann Dufresne's avatar Yoann Dufresne

add debug graph outputing fort mwm

parent cdd430e8
......@@ -12,7 +12,7 @@ from deconvolution.dgraph.LouvainDGFactory import LouvainDGFactory
class D2Graph(nx.Graph):
"""D2Graph (read it (d-graph)²)"""
def __init__(self, barcode_graph):
def __init__(self, barcode_graph, debug=False, debug_path='.'):
super(D2Graph, self).__init__()
self.all_d_graphs = []
self.d_graphs_per_node = {}
......@@ -31,6 +31,9 @@ class D2Graph(nx.Graph):
self.barcode_edge_idxs[edge] = idx
self.barcode_edge_idxs[(edge[1], edge[0])] = idx
self.debug = debug
self.debug_path = debug_path
""" Redefine subgraph to avoid errors type instantiation errors.
"""
......@@ -67,7 +70,7 @@ class D2Graph(nx.Graph):
if clique_mode == "louvain":
dg_factory = LouvainDGFactory(self.barcode_graph)
else:
dg_factory = CliqueDGFactory(self.barcode_graph)
dg_factory = CliqueDGFactory(self.barcode_graph, debug=self.debug, debug_path=self.debug_path)
self.d_graphs_per_node = dg_factory.generate_all_dgraphs(threads=threads, verbose=True)
if verbose:
counts = sum(len(x) for x in self.d_graphs_per_node.values())
......@@ -201,7 +204,7 @@ class D2Graph(nx.Graph):
# Add on small distances
d = dg.distance_to(prev_dg)
if d <= 5:
if d <= min(len(dg.node_set)/2, len(prev_dg.node_set)/2):
self.add_edge(nodes[dg], nodes[prev_dg], distance=d)
return bidict(nodes)
......
......@@ -36,7 +36,8 @@ def process_node(factory, node):
return node, dgs
class AbstractDGFactory:
def __init__(self, graph):
def __init__(self, graph, debug=False):
self.debug = debug
self.graph = graph
self.nb_nodes = len(self.graph.nodes())
self.verbose = False
......
......@@ -7,11 +7,21 @@ from deconvolution.dgraph import AbstractDGIndex
class CliqueDGFactory(AbstractDGFactory):
def __init__(self, graph, min_size_clique=4, dg_max_divergence_factor=0.5):
super(CliqueDGFactory, self).__init__(graph)
def __init__(self, graph, min_size_clique=4, dg_max_divergence_factor=0.5, debug=False, debug_path="."):
super(CliqueDGFactory, self).__init__(graph, debug=debug)
self.min_size = min_size_clique
self.dg_max_divergence_factor = dg_max_divergence_factor
if debug:
self.debug_path = debug_path
# Create mwm debug dir
import os
self.mwm_dir = f"{self.debug_path}/mwm"
if os.path.isdir(self.mwm_dir):
os.rmdir(self.mwm_dir)
os.mkdir(self.mwm_dir)
def generate_by_node(self, central_node, subgraph):
node_d_graphs = set()
......@@ -71,6 +81,11 @@ class CliqueDGFactory(AbstractDGFactory):
for idx1, idx2 in clq_pairs:
clq_G.edges[idx1, idx2]['weight'] = max_div - clq_G.edges[idx1, idx2]['weight']
if self.debug and len(clq_G.nodes) > 0:
import os
nx.write_gexf(clq_G, f"{self.mwm_dir}/{node}.gexf")
# d-graph computation regarding max weight matching
mwm = nx.algorithms.max_weight_matching(clq_G)
for idx1, idx2 in mwm:
......
......@@ -226,93 +226,3 @@ class Dgraph(object):
str_nodes.sort()
return str(str_nodes)
#
# from multiprocessing import Pool
#
# """ From a barcode graph, compute all the possible max d-graphs by node.
# @param graph A barcode graph
# @return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
# """
# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1):
# d_graphs = FixedDGIndex(size=1)
# pool = Pool(processes=threads)
#
# nds = list(graph.nodes())
# for idx, node in enumerate(nds):
# print(idx+1, '/', len(nds))
# #if "MI" not in str(node): continue # for debugging; only look at deconvolved nodes
# #print(f"\r{idx+1}/{len(graph.nodes())}")
# neighbors = list(graph.neighbors(node))
# neighbors_graph = nx.Graph(graph.subgraph(neighbors))
#
# node_d_graphs = set()
#
# mode_str = " "
# if clique_mode is None:
# # Find all the cliques (equivalent to compute all the candidate half d-graph)
# cliques = []
# for clique in nx.find_cliques(neighbors_graph):
# if len(clique) > 3:
# cliques.append(clique)
# mode_str += "(max-cliques)"
# elif clique_mode == "louvain":
# louvain = community.best_partition(neighbors_graph) # louvain
# # high resolution seems to work better
# communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())]
# mode_str += "(louvain)"
# cliques = []
# for comm in communities:
# # further decompose! into necessarily 2 communities
# community_as_graph = nx.Graph(graph.subgraph(comm))
# if len(community_as_graph.nodes()) <= 2:
# cliques += [community_as_graph.nodes()]
# else:
# cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2))
#
# elif clique_mode == "testing":
# # k-clique communities
# #from networkx.algorithms.community import k_clique_communities
# #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter
# from cdlib import algorithms
# cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1})
# #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1})
# #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of
# from collections import defaultdict
# cliques_dict2 = defaultdict(list)
# for (node, values) in cliques_dict.to_node_community_map().items():
# for value in values:
# cliques_dict2[value] += [node]
# cliques = list(cliques_dict2.values())
# mode_str += "(testing)"
#
# if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")")
#
# cliques_debugging = True
# if cliques_debugging:
#
# from collections import Counter
# len_cliques = Counter(map(len,cliques))
#
# # Pair halves to create d-graphes
# for idx, clq1 in enumerate(cliques):
# for clq2_idx in range(idx+1, len(cliques)):
# clq2 = cliques[clq2_idx]
#
# # Check for d-graph candidates
# d_graph = Dgraph(node)
# d_graph.put_halves(clq1, clq2, neighbors_graph)
#
# factor = 0.5
# #if clique_mode == "testing": factor = 1 # still allows louvain's big communities
# #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor)
# if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor:
# node_d_graphs.add(d_graph)
#
# # Fill the index by node
# key = frozenset({node})
# for dg in node_d_graphs:
# d_graphs.add_value(key, dg)
#
# d_graphs.filter_by_entry()
# return d_graphs
#
......@@ -12,7 +12,7 @@ def parse_arguments():
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formated file.')
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.")
parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
# parser.add_argument('--debug', '-d', action='store_true', help="Debug")
parser.add_argument('--debug', '-d', action='store_true', help="Debug")
parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)")
parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques")
parser.add_argument('--comtest', '-k', action='store_true', help="Enable [placeholder] community detection algorithm instead of max-cliques")
......@@ -30,7 +30,6 @@ def main():
def dprint(s):
from datetime import datetime
t = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# if debug: print(t,"[debug]",s)
dprint("loading barcode graph")
if filename.endswith('.gexf'):
......@@ -49,9 +48,20 @@ def main():
else:
clique_mode = None
# Debug config
debug = False
debug_path = "/dev/null"
if args.debug:
debug = True
debug_path = f"{args.output_prefix}_debug"
import os
if os.path.isdir(debug_path):
os.rmdir(debug_path)
os.mkdir(debug_path)
# Index size must be changed for general purpose. 8 is good for d=5
dprint("creating D2graph object")
d2g = d2.D2Graph(G)
d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path)
dprint("D2 graph object created")
dprint("constructing d2 graph from barcode graph")
index_size = 4 #if clique_mode is None else 3
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment