Commit 2122230c by Yoann Dufresne

simplification of d2 construction

parent e6072e12
 ... @@ -83,28 +83,28 @@ class D2Graph(nx.Graph): ... @@ -83,28 +83,28 @@ class D2Graph(nx.Graph): d_graph.idx = idx d_graph.idx = idx self.node_by_idx[idx] = d_graph self.node_by_idx[idx] = d_graph # Index all the d-graphs # # Index all the d-graphs if verbose: # if verbose: print("Compute the dmer dgraph") # print("Compute the dmer dgraph") print("\tIndexing") # print("\tIndexing") # self.index = FixedDGIndex(size=index_size) # # self.index = FixedDGIndex(size=index_size) self.index = VariableDGIndex(size=index_size) # self.index = VariableDGIndex(size=index_size) for idx, dg in enumerate(self.all_d_graphs): # for idx, dg in enumerate(self.all_d_graphs): if verbose: # if verbose: print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='') # print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='') self.index.add_dgraph(dg) # self.index.add_dgraph(dg) # self.var_index.add_dgraph(dg) # # self.var_index.add_dgraph(dg) if verbose: # if verbose: print() # print() print("\tFilter index") # print("\tFilter index") self.index.filter_by_entry() # self.index.filter_by_entry() # self.index = self.create_index_from_tuples(index_size, verbose=verbose) # # self.index = self.create_index_from_tuples(index_size, verbose=verbose) # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose) # # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose) # Compute node distances for pair of dgraphs that share at least 1 dmer. # # Compute node distances for pair of dgraphs that share at least 1 dmer. if verbose: if verbose: print("Compute the graph") print("Compute the graph") # Create the graph # Create the graph self.bidict_nodes = self.create_graph() self.bidict_nodes = self.create_graph_from_node_neighborhoods() def get_covering_variables(self, udg): def get_covering_variables(self, udg): ... @@ -179,7 +179,43 @@ class D2Graph(nx.Graph): ... @@ -179,7 +179,43 @@ class D2Graph(nx.Graph): return index return index def create_graph(self): def create_graph_from_node_neighborhoods(self, neighborhood_threshold=0.25): nodes = {} # Create the nodes of d2g from udgs for dg in self.all_d_graphs: nodes[dg] = dg.idx self.add_node(nodes[dg]) # Add covering barcode edges barcode_edges = " ".join([str(self.barcode_edge_idxs[x]) for x in dg.edges]) self.nodes[nodes[dg]]["barcode_edges"] = barcode_edges self.nodes[nodes[dg]]["score"] = f"{dg.score}/{dg.get_optimal_score()}" self.nodes[nodes[dg]]["udg"] = str(dg) # Create the edges from neighbor edges for dg in self.all_d_graphs: for node in dg.to_node_set(): if node == dg.center: continue entry = frozenset({node}) if entry in self.d_graphs_per_node: colliding_dgs = self.d_graphs_per_node[entry] for colliding_dg in colliding_dgs: distance = dg.distance_to(colliding_dg) distance_ratio = distance / (len(dg.nodes) + len(colliding_dg.nodes)) if distance_ratio <= neighborhood_threshold: self.add_edge(nodes[dg], nodes[colliding_dg], distance=distance) # Filter out singletons graph_nodes = list(nodes) for n in graph_nodes: if len(list(self.neighbors(nodes[n]))) == 0: self.remove_node(nodes[n]) del nodes[n] return bidict(nodes) def create_graph_from_index(self): nodes = {} nodes = {} for dmer in self.index: for dmer in self.index: ... ...
 ... @@ -23,13 +23,14 @@ def process_node(factory, node): ... @@ -23,13 +23,14 @@ def process_node(factory, node): if factory.verbose: if factory.verbose: print(f"{my_value}: d-graphs generated, starting filtering") print(f"{my_value}: d-graphs generated, starting filtering") print(f"{my_value}: {len(dgs)} graphs to filter") print(f"{my_value}: {len(dgs)} udg to filter") sys.stdout.flush() sys.stdout.flush() # udg domination filtering # udg domination filtering dgs = AbstractDGIndex.filter_entry(dgs) dgs = AbstractDGIndex.filter_entry(dgs) if factory.verbose: if factory.verbose: print(f"{my_value}: {len(dgs)} udg remaining after filtering") print(f"{my_value}({factory.nb_nodes}) terminated") print(f"{my_value}({factory.nb_nodes}) terminated") sys.stdout.flush() sys.stdout.flush() ... @@ -68,7 +69,6 @@ class AbstractDGFactory: ... @@ -68,7 +69,6 @@ class AbstractDGFactory: for dg in dgs: for dg in dgs: index.add_value(key, dg) index.add_value(key, dg) # index.filter_by_entry() return index return index ... ...
 ... @@ -84,7 +84,7 @@ class CliqueDGFactory(AbstractDGFactory): ... @@ -84,7 +84,7 @@ class CliqueDGFactory(AbstractDGFactory): if self.debug and len(clq_G.nodes) > 0: if self.debug and len(clq_G.nodes) > 0: import os import os nx.write_gexf(clq_G, f"{self.mwm_dir}/{node}.gexf") nx.write_gexf(clq_G, f"{self.mwm_dir}/{central_node.replace('/', '-')}.gexf") # d-graph computation regarding max weight matching # d-graph computation regarding max weight matching mwm = nx.algorithms.max_weight_matching(clq_G) mwm = nx.algorithms.max_weight_matching(clq_G) ... ...
 ... @@ -54,9 +54,9 @@ def main(): ... @@ -54,9 +54,9 @@ def main(): if args.debug: if args.debug: debug = True debug = True debug_path = f"{args.output_prefix}_debug" debug_path = f"{args.output_prefix}_debug" import os import os, shutil if os.path.isdir(debug_path): if os.path.isdir(debug_path): os.rmdir(debug_path) shutil.rmtree(debug_path) os.mkdir(debug_path) os.mkdir(debug_path) # Index size must be changed for general purpose. 8 is good for d=5 # Index size must be changed for general purpose. 8 is good for d=5 ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!