Commit 7f3d1f5b by Yoann Dufresne

### destructive solution fail test

parent fafb4927
 ... @@ -14,7 +14,7 @@ class LcpGraph(nx.Graph): ... @@ -14,7 +14,7 @@ class LcpGraph(nx.Graph): """LcpGraph""" """LcpGraph""" def __init__(self, debug=False, debug_path='.'): def __init__(self, debug=False, debug_path='.'): super(LcpGraph, self).__init__() super(LcpGraph, self).__init__() self.all_lcp = [] self.all_lcps = [] self.lcp_per_node = {} self.lcp_per_node = {} self.node_by_idx = {} self.node_by_idx = {} self.barcode_graph = None self.barcode_graph = None ... @@ -85,10 +85,10 @@ class LcpGraph(nx.Graph): ... @@ -85,10 +85,10 @@ class LcpGraph(nx.Graph): counts = sum(len(x) for x in self.lcp_per_node.values()) counts = sum(len(x) for x in self.lcp_per_node.values()) print(f"\t {counts} computed d-graphs") print(f"\t {counts} computed d-graphs") for d_graphs in self.lcp_per_node.values(): for d_graphs in self.lcp_per_node.values(): self.all_lcp.extend(d_graphs) self.all_lcps.extend(d_graphs) # Number the d_graphs # Number the d_graphs for idx, d_graph in enumerate(self.all_lcp): for idx, d_graph in enumerate(self.all_lcps): d_graph.idx = idx d_graph.idx = idx self.node_by_idx[idx] = d_graph self.node_by_idx[idx] = d_graph ... @@ -130,13 +130,13 @@ class LcpGraph(nx.Graph): ... @@ -130,13 +130,13 @@ class LcpGraph(nx.Graph): self.bidict_nodes = {} self.bidict_nodes = {} for idx, node in enumerate(self.nodes(data=True)): for idx, node in enumerate(self.nodes(data=True)): node, data = node node, data = node dg = Lcp.load(data["udg"], data["score"], data["barcode_edges"]) lcp = Lcp.load(data["udg"], data["score"], data["barcode_edges"]) self.variables.update(dg.edges) self.variables.update(lcp.edges) self.bidict_nodes[node] = dg self.bidict_nodes[node] = lcp self.all_lcp.append(dg) self.all_lcps.append(lcp) if dg.idx == -1: if lcp.idx == -1: dg.idx = int(node) lcp.idx = int(node) self.node_by_idx[dg.idx] = dg self.node_by_idx[lcp.idx] = lcp # self.node_by_name[node] = lcp # self.node_by_name[node] = lcp self.bidict_nodes = bidict(self.bidict_nodes) self.bidict_nodes = bidict(self.bidict_nodes) ... @@ -145,7 +145,7 @@ class LcpGraph(nx.Graph): ... @@ -145,7 +145,7 @@ class LcpGraph(nx.Graph): nodes = {} nodes = {} # Create the nodes of lcpg from udgs # Create the nodes of lcpg from udgs for lcp in self.all_lcp: for lcp in self.all_lcps: nodes[lcp] = lcp.idx nodes[lcp] = lcp.idx self.add_node(nodes[lcp]) self.add_node(nodes[lcp]) # Add covering barcode edges # Add covering barcode edges ... @@ -156,7 +156,7 @@ class LcpGraph(nx.Graph): ... @@ -156,7 +156,7 @@ class LcpGraph(nx.Graph): self.nodes[nodes[lcp]]["central_node_barcode"] = str(lcp.center) self.nodes[nodes[lcp]]["central_node_barcode"] = str(lcp.center) # Create the edges from neighbor edges # Create the edges from neighbor edges for lcp in self.all_lcp: for lcp in self.all_lcps: for node in lcp.to_node_set(): for node in lcp.to_node_set(): if node == lcp.center: if node == lcp.center: continue continue ... ...
 import networkx as nx import networkx as nx from collections import Counter from collections import Counter import sys import sys from deconvolution.lcpgraph.lcp_graph import LcpGraph """ Represent an udg path into a lcpg graph """ Represent an udg path into a lcpg graph ... @@ -9,6 +10,7 @@ class Path(list): ... @@ -9,6 +10,7 @@ class Path(list): def __init__(self, lcpg): def __init__(self, lcpg): super(Path, self).__init__() super(Path, self).__init__() self.lcpg = lcpg self.lcpg = lcpg self.covering_variables = {x: 0 for x in self.lcpg.variables} self.covering_variables = {x: 0 for x in self.lcpg.variables} self.covering_value = 0 self.covering_value = 0 ... @@ -127,23 +129,23 @@ class Path(list): ... @@ -127,23 +129,23 @@ class Path(list): def covering_score(self): def covering_score(self): return self.covering_value / len(self.covering_variables) return self.covering_value / len(self.covering_variables) def save_gexf(self, filename): def to_lcpg(self): d2p = nx.Graph() lcpg = nx.Graph() # Add the nodes # Add the nodes for udg in self: for lcp in self: d2p.add_node(udg.idx) lcpg.add_node(lcp.idx) d2p.nodes[udg.idx]["center"] = udg.center lcpg.nodes[lcp.idx]["center"] = lcp.center d2p.nodes[udg.idx]["udg"] = str(udg) lcpg.nodes[lcp.idx]["udg"] = str(lcp) d2p.nodes[udg.idx]["score"] = f"{udg.score}/{udg.get_optimal_score()}" lcpg.nodes[lcp.idx]["score"] = f"{lcp.score}/{lcp.get_optimal_score()}" barcode_edges = " ".join([str(x) for x in udg.edges]) barcode_edges = " ".join([str(x) for x in lcp.edges]) d2p.nodes[udg.idx]["barcode_edges"] = barcode_edges lcpg.nodes[lcp.idx]["barcode_edges"] = barcode_edges # add the edges # add the edges for idx in range(len(self)-1): for idx in range(len(self)-1): udg1 = self[idx] lcp1 = self[idx] udg2 = self[idx+1] lcp2 = self[idx+1] d2p.add_edge(udg1.idx, udg2.idx) lcpg.add_edge(lcp1.idx, lcp2.idx) nx.write_gexf(d2p, filename) return lcpg
 from deconvolution.lcpgraph.lcp_graph import LcpGraph from deconvolution.lcpgraph.lcp_path import Path from random import randint, shuffle import networkx as nx class LcpOrder: def __init__(self, lcp_iterable, lcp_neighborhood): self.lcps_to_idxs = {} self.idxs_to_lcps = {} self.neighborhood = lcp_neighborhood # Init order for idx, lcp in enumerate(lcp_iterable): self.lcps_to_idxs[lcp] = idx self.idxs_to_lcps[idx] = lcp # init score self.score = 0 for lcp, idx in self.lcps_to_idxs.items(): neighbors = self.neighborhood[lcp] for neighbor in neighbors: nei_idx = self.lcps_to_idxs[neighbor] if nei_idx > idx: self.score += nei_idx - idx def neighborhood_score(self, lcp): idx = self.lcps_to_idxs[lcp] neighbors = self.neighborhood[lcp] score = 0 for neighbor in neighbors: nei_idx = self.lcps_to_idxs[neighbor] score += abs(idx - nei_idx) return score def switch_right(self, idx): left_lcp = self.idxs_to_lcps[idx] right_lcp = self.idxs_to_lcps[idx+1] score_before = self.neighborhood_score(left_lcp) + self.neighborhood_score(right_lcp) # switch self.idxs_to_lcps[idx] = right_lcp self.lcps_to_idxs[right_lcp] = idx self.idxs_to_lcps[idx+1] = left_lcp self.lcps_to_idxs[left_lcp] = idx + 1 score_after = self.neighborhood_score(left_lcp) + self.neighborhood_score(right_lcp) diff_score = score_after - score_before self.score += diff_score return diff_score def switch_left(self, idx): return self.switch_right(idx-1) def debug_save(self, filename): with open(filename, "w") as out: for idx in range(len(self.idxs_to_lcps)): lcp = self.idxs_to_lcps[idx] out.write(f"{idx}, {lcp.center}") for nei_lcp in self.neighborhood[lcp]: n_idx = self.lcps_to_idxs[nei_lcp] out.write(f" ({abs(idx - n_idx)}, {n_idx}, {nei_lcp.center})") out.write("\n") def to_path(self, lcpg): path = Path(lcpg) for idx in range(len(self.idxs_to_lcps)): lcp = self.idxs_to_lcps[idx] path.append(lcp) return path def lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=0, existing_path=False): neighbors = {} all_lcps = [] # Filter out small neighborhood lcps candidate_nodes = [] if existing_path: candidate_nodes = [n for n, d in lcpg.nodes(data=True) if "path_idx" in d] candidate_nodes.sort(key=lambda n: lcpg.nodes[n]["path_idx"]) else: candidate_nodes = list(lcpg.nodes()) modified = True while modified: modified = False updated_candidates = [] for node in candidate_nodes: # Count the number of candidate in the neighborhood neighborhood_size = 0 for nei in lcpg[node]: if nei in candidate_nodes: neighborhood_size += 1 # Apply filtering if neighborhood_size > neighbor_size_threshold: updated_candidates.append(node) else: modified = True candidate_nodes = updated_candidates # prepare the lcps and the neighborhood matrix for node in candidate_nodes: lcp = lcpg.bidict_nodes[node] all_lcps.append(lcp) neighbors[lcp] = [] for neighbor_node in lcpg[node]: if neighbor_node in candidate_nodes: nei_lcp = lcpg.bidict_nodes[neighbor_node] neighbors[lcp].append(nei_lcp) return LcpOrder(all_lcps, neighbors) def lcpord_to_lcpg(lcpg, lcp_order): for idx, lcp in lcp_order.idxs_to_lcps.items(): lcp_node = lcpg.bidict_nodes.inverse[lcp] lcpg.nodes[lcp_node]["path_idx"] = idx return lcpg def max_local_optimization(lcp_order, idx, step): """ Take the node of index idx and switch it with the index idx+step. Repeat until score increase. :param lcp_order: lcp order object to modify :param idx: The idx of lcp to move :param step: The jump size and orientation :return: True if the order was modified. """ orientation = -1 if step < 0 else 1 i = idx if orientation == 1 else idx-1 start_i = i jump_size = abs(step) assert step != 0 max_score_idx = i max_diff_score = 0 while jump_size-1 <= i <= len(lcp_order.idxs_to_lcps) - jump_size - 1: diff_score = 0 for _ in range(jump_size): diff_score += lcp_order.switch_right(i) i += orientation if diff_score < max_diff_score: max_diff_score = diff_score max_score_idx = i if diff_score > 0: break while i != max_score_idx: # for _ in range(jump_size): i -= orientation lcp_order.switch_right(i) return start_i != max_score_idx def queue_optimization(lcp_order, max_steps=10000, max_turn_without_score_modif=-1): # Queues init score_queue = list(lcp_order.lcps_to_idxs.keys()) max_turn_without_score_modif = len(score_queue) identical_queue = list() queued_lcps = set(score_queue) # direction and jump size init jump_size = len(queued_lcps) // 10 orientation = [-1, 1] # Control variables step = 0 step_without_score_increase = 0 while jump_size > 1: while (len(score_queue) > 0 or len(identical_queue) > 0)\ and step < max_steps and (jump_size > 1 or step_without_score_increase < max_turn_without_score_modif): # Init step += 1 step_without_score_increase += 1 current_lcp = None if len(score_queue) > 0: current_lcp = score_queue.pop(0) elif len(identical_queue) > 0: current_lcp = identical_queue.pop(0) queued_lcps.remove(current_lcp) init_idx = lcp_order.lcps_to_idxs[current_lcp] shuffle(orientation) prev_score = lcp_order.score # Hill climb modified = max_local_optimization(lcp_order, init_idx, orientation[0]*jump_size) current_idx = lcp_order.lcps_to_idxs[current_lcp] modified = max_local_optimization(lcp_order, current_idx, orientation[1]*jump_size) or modified end_idx = lcp_order.lcps_to_idxs[current_lcp] if modified: if lcp_order.score != prev_score: step_without_score_increase = 0 max_turn_without_score_modif = len(score_queue) + len(identical_queue) # Add neighbors in queue if init_idx != end_idx: to_add = set() for idx in range(init_idx, end_idx, -1 if end_idx < init_idx else 1): lcp = lcp_order.idxs_to_lcps[idx] to_add.add(lcp) for lcp in lcp_order.neighborhood[current_lcp]: to_add.add(lcp) for lcp in to_add: if lcp not in queued_lcps: queued_lcps.add(lcp) if lcp_order.score != prev_score: score_queue.append(lcp) else: identical_queue.append(lcp) print(lcp_order.score, len(score_queue), len(identical_queue), jump_size) if step_without_score_increase > len(queued_lcps): score_queue = list(lcp_order.idxs_to_lcps.values()) identical_queue = [] queued_lcps = set(score_queue) max_turn_without_score_modif = len(queued_lcps) jump_size //= 10 jump_size = max(jump_size+1, 1) score_queue = list(lcp_order.idxs_to_lcps.values()) identical_queue = [] queued_lcps = set(score_queue) max_turn_without_score_modif = len(queued_lcps) jump_size //= 10 jump_size = max(jump_size+1, 1) lcp_order.debug_save(f"snake_experiments/debug_pathorder_{lcp_order.score}.txt") def main(): lcpg = LcpGraph() lcpg.load("snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath442073.gexf") order = lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=1, existing_path=True) queue_optimization(order, max_steps=float("inf")) lcpg = lcpord_to_lcpg(lcpg, order) nx.write_gexf(lcpg, f"snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath{order.score}.gexf") if __name__ == "__main__": main()
 ... @@ -59,7 +59,8 @@ def main(): ... @@ -59,7 +59,8 @@ def main(): print(f"covering score: {path.covering_score()}") print(f"covering score: {path.covering_score()}") # solution.save_path_in_graph(f"{args.out_prefix}_d2_path.gexf") # solution.save_path_in_graph(f"{args.out_prefix}_d2_path.gexf") path.save_gexf(f"{args.out_prefix}_path.gexf") lcpg = path.to_lcpg() nx.write_gexf(lcpg, f"{args.out_prefix}_path.gexf") print("Solution saved") print("Solution saved") ... ...
 from deconvolution.lcpgraph.lcp_graph import LcpGraph from deconvolution.lcpgraph.lcp_path_ordering import lcpord_from_connex_lcpg from deconvolution.main.evaluate import compute_shortest_edit_path def heat_path(order): heatpath = [0]*len(order.idxs_to_lcps) for idx, lcp in order.idxs_to_lcps.items(): heatpath[idx] = order.neighborhood_score(lcp) return heatpath def main(): lcpg = LcpGraph() lcpg.load("snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath442073.gexf") order = lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=1, existing_path=True) path = order.to_path(lcpg) path_graph = path.to_lcpg() sep = compute_shortest_edit_path(path_graph) print(len(sep)) if __name__ == "__main__": main()