Commit 2122230c authored by Yoann Dufresne's avatar Yoann Dufresne

simplification of d2 construction

parent e6072e12
......@@ -83,28 +83,28 @@ class D2Graph(nx.Graph):
d_graph.idx = idx
self.node_by_idx[idx] = d_graph
# Index all the d-graphs
if verbose:
print("Compute the dmer dgraph")
print("\tIndexing")
# self.index = FixedDGIndex(size=index_size)
self.index = VariableDGIndex(size=index_size)
for idx, dg in enumerate(self.all_d_graphs):
if verbose:
print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='')
self.index.add_dgraph(dg)
# self.var_index.add_dgraph(dg)
if verbose:
print()
print("\tFilter index")
self.index.filter_by_entry()
# self.index = self.create_index_from_tuples(index_size, verbose=verbose)
# self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
# Compute node distances for pair of dgraphs that share at least 1 dmer.
# # Index all the d-graphs
# if verbose:
# print("Compute the dmer dgraph")
# print("\tIndexing")
# # self.index = FixedDGIndex(size=index_size)
# self.index = VariableDGIndex(size=index_size)
# for idx, dg in enumerate(self.all_d_graphs):
# if verbose:
# print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='')
# self.index.add_dgraph(dg)
# # self.var_index.add_dgraph(dg)
# if verbose:
# print()
# print("\tFilter index")
# self.index.filter_by_entry()
# # self.index = self.create_index_from_tuples(index_size, verbose=verbose)
# # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
# # Compute node distances for pair of dgraphs that share at least 1 dmer.
if verbose:
print("Compute the graph")
# Create the graph
self.bidict_nodes = self.create_graph()
self.bidict_nodes = self.create_graph_from_node_neighborhoods()
def get_covering_variables(self, udg):
......@@ -179,7 +179,43 @@ class D2Graph(nx.Graph):
return index
def create_graph(self):
def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25):
nodes = {}
# Create the nodes of d2g from udgs
for dg in self.all_d_graphs:
nodes[dg] = dg.idx
self.add_node(nodes[dg])
# Add covering barcode edges
barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges])
self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges
self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}"
self.nodes[nodes[dg]]["udg"] = str(dg)
# Create the edges from neighbor edges
for dg in self.all_d_graphs:
for node in dg.to_node_set():
if node == dg.center:
continue
entry = frozenset({node})
if entry in self.d_graphs_per_node:
colliding_dgs = self.d_graphs_per_node[entry]
for colliding_dg in colliding_dgs:
distance = dg.distance_to(colliding_dg)
distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes))
if distance_ratio <= neighborhood_threshold:
self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance)
# Filter out singletons
graph_nodes = list(nodes)
for n in graph_nodes:
if len(list(self.neighbors(nodes[n]))) == 0:
self.remove_node(nodes[n])
del nodes[n]
return bidict(nodes)
def create_graph_from_index(self):
nodes = {}
for dmer in self.index:
......
......@@ -23,13 +23,14 @@ def process_node(factory, node):
if factory.verbose:
print(f"{my_value}: d-graphs generated, starting filtering")
print(f"{my_value}: {len(dgs)} graphs to filter")
print(f"{my_value}: {len(dgs)} udg to filter")
sys.stdout.flush()
# udg domination filtering
dgs = AbstractDGIndex.filter_entry(dgs)
if factory.verbose:
print(f"{my_value}: {len(dgs)} udg remaining after filtering")
print(f"{my_value}({factory.nb_nodes}) terminated")
sys.stdout.flush()
......@@ -68,7 +69,6 @@ class AbstractDGFactory:
for dg in dgs:
index.add_value(key, dg)
# index.filter_by_entry()
return index
......
......@@ -84,7 +84,7 @@ class CliqueDGFactory(AbstractDGFactory):
if self.debug and len(clq_G.nodes) > 0:
import os
nx.write_gexf(clq_G, f"{self.mwm_dir}/{node}.gexf")
nx.write_gexf(clq_G, f"{self.mwm_dir}/{central_node.replace('/', '-')}.gexf")
# d-graph computation regarding max weight matching
mwm = nx.algorithms.max_weight_matching(clq_G)
......
......@@ -54,9 +54,9 @@ def main():
if args.debug:
debug = True
debug_path = f"{args.output_prefix}_debug"
import os
import os, shutil
if os.path.isdir(debug_path):
os.rmdir(debug_path)
shutil.rmtree(debug_path)
os.mkdir(debug_path)
# Index size must be changed for general purpose. 8 is good for d=5
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment