...
 
Commits (2)
......@@ -14,7 +14,7 @@ class LcpGraph(nx.Graph):
"""LcpGraph"""
def __init__(self, debug=False, debug_path='.'):
super(LcpGraph, self).__init__()
self.all_lcp = []
self.all_lcps = []
self.lcp_per_node = {}
self.node_by_idx = {}
self.barcode_graph = None
......@@ -85,10 +85,10 @@ class LcpGraph(nx.Graph):
counts = sum(len(x) for x in self.lcp_per_node.values())
print(f"\t {counts} computed d-graphs")
for d_graphs in self.lcp_per_node.values():
self.all_lcp.extend(d_graphs)
self.all_lcps.extend(d_graphs)
# Number the d_graphs
for idx, d_graph in enumerate(self.all_lcp):
for idx, d_graph in enumerate(self.all_lcps):
d_graph.idx = idx
self.node_by_idx[idx] = d_graph
......@@ -130,13 +130,13 @@ class LcpGraph(nx.Graph):
self.bidict_nodes = {}
for idx, node in enumerate(self.nodes(data=True)):
node, data = node
dg = Lcp.load(data["udg"], data["score"], data["barcode_edges"])
self.variables.update(dg.edges)
self.bidict_nodes[node] = dg
self.all_lcp.append(dg)
if dg.idx == -1:
dg.idx = int(node)
self.node_by_idx[dg.idx] = dg
lcp = Lcp.load(data["udg"], data["score"], data["barcode_edges"])
self.variables.update(lcp.edges)
self.bidict_nodes[node] = lcp
self.all_lcps.append(lcp)
if lcp.idx == -1:
lcp.idx = int(node)
self.node_by_idx[lcp.idx] = lcp
# self.node_by_name[node] = lcp
self.bidict_nodes = bidict(self.bidict_nodes)
......@@ -145,7 +145,7 @@ class LcpGraph(nx.Graph):
nodes = {}
# Create the nodes of lcpg from udgs
for lcp in self.all_lcp:
for lcp in self.all_lcps:
nodes[lcp] = lcp.idx
self.add_node(nodes[lcp])
# Add covering barcode edges
......@@ -156,7 +156,7 @@ class LcpGraph(nx.Graph):
self.nodes[nodes[lcp]]["central_node_barcode"] = str(lcp.center)
# Create the edges from neighbor edges
for lcp in self.all_lcp:
for lcp in self.all_lcps:
for node in lcp.to_node_set():
if node == lcp.center:
continue
......
import networkx as nx
from collections import Counter
import sys
from deconvolution.lcpgraph.lcp_graph import LcpGraph
""" Represent an udg path into a lcpg graph
......@@ -9,6 +10,7 @@ class Path(list):
def __init__(self, lcpg):
super(Path, self).__init__()
self.lcpg = lcpg
self.covering_variables = {x: 0 for x in self.lcpg.variables}
self.covering_value = 0
......@@ -127,23 +129,23 @@ class Path(list):
def covering_score(self):
return self.covering_value / len(self.covering_variables)
def save_gexf(self, filename):
d2p = nx.Graph()
def to_lcpg(self):
lcpg = nx.Graph()
# Add the nodes
for udg in self:
d2p.add_node(udg.idx)
d2p.nodes[udg.idx]["center"] = udg.center
d2p.nodes[udg.idx]["udg"] = str(udg)
d2p.nodes[udg.idx]["score"] = f"{udg.score}/{udg.get_optimal_score()}"
barcode_edges = " ".join([str(x) for x in udg.edges])
d2p.nodes[udg.idx]["barcode_edges"] = barcode_edges
for lcp in self:
lcpg.add_node(lcp.idx)
lcpg.nodes[lcp.idx]["center"] = lcp.center
lcpg.nodes[lcp.idx]["udg"] = str(lcp)
lcpg.nodes[lcp.idx]["score"] = f"{lcp.score}/{lcp.get_optimal_score()}"
barcode_edges = " ".join([str(x) for x in lcp.edges])
lcpg.nodes[lcp.idx]["barcode_edges"] = barcode_edges
# add the edges
for idx in range(len(self)-1):
udg1 = self[idx]
udg2 = self[idx+1]
lcp1 = self[idx]
lcp2 = self[idx+1]
d2p.add_edge(udg1.idx, udg2.idx)
lcpg.add_edge(lcp1.idx, lcp2.idx)
nx.write_gexf(d2p, filename)
return lcpg
from deconvolution.lcpgraph.lcp_graph import LcpGraph
from deconvolution.lcpgraph.lcp_path import Path
from random import randint, shuffle
import networkx as nx
class LcpOrder:
def __init__(self, lcp_iterable, lcp_neighborhood):
self.lcps_to_idxs = {}
self.idxs_to_lcps = {}
self.neighborhood = lcp_neighborhood
# Init order
for idx, lcp in enumerate(lcp_iterable):
self.lcps_to_idxs[lcp] = idx
self.idxs_to_lcps[idx] = lcp
# init score
self.score = 0
for lcp, idx in self.lcps_to_idxs.items():
neighbors = self.neighborhood[lcp]
for neighbor in neighbors:
nei_idx = self.lcps_to_idxs[neighbor]
if nei_idx > idx:
self.score += nei_idx - idx
def neighborhood_score(self, lcp):
idx = self.lcps_to_idxs[lcp]
neighbors = self.neighborhood[lcp]
score = 0
for neighbor in neighbors:
nei_idx = self.lcps_to_idxs[neighbor]
score += abs(idx - nei_idx)
return score
def switch_right(self, idx):
left_lcp = self.idxs_to_lcps[idx]
right_lcp = self.idxs_to_lcps[idx+1]
score_before = self.neighborhood_score(left_lcp) + self.neighborhood_score(right_lcp)
# switch
self.idxs_to_lcps[idx] = right_lcp
self.lcps_to_idxs[right_lcp] = idx
self.idxs_to_lcps[idx+1] = left_lcp
self.lcps_to_idxs[left_lcp] = idx + 1
score_after = self.neighborhood_score(left_lcp) + self.neighborhood_score(right_lcp)
diff_score = score_after - score_before
self.score += diff_score
return diff_score
def switch_left(self, idx):
return self.switch_right(idx-1)
def debug_save(self, filename):
with open(filename, "w") as out:
for idx in range(len(self.idxs_to_lcps)):
lcp = self.idxs_to_lcps[idx]
out.write(f"{idx}, {lcp.center}")
for nei_lcp in self.neighborhood[lcp]:
n_idx = self.lcps_to_idxs[nei_lcp]
out.write(f" ({abs(idx - n_idx)}, {n_idx}, {nei_lcp.center})")
out.write("\n")
def to_path(self, lcpg):
path = Path(lcpg)
for idx in range(len(self.idxs_to_lcps)):
lcp = self.idxs_to_lcps[idx]
path.append(lcp)
return path
def lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=0, existing_path=False):
neighbors = {}
all_lcps = []
# Filter out small neighborhood lcps
candidate_nodes = []
if existing_path:
candidate_nodes = [n for n, d in lcpg.nodes(data=True) if "path_idx" in d]
candidate_nodes.sort(key=lambda n: lcpg.nodes[n]["path_idx"])
else:
candidate_nodes = list(lcpg.nodes())
modified = True
while modified:
modified = False
updated_candidates = []
for node in candidate_nodes:
# Count the number of candidate in the neighborhood
neighborhood_size = 0
for nei in lcpg[node]:
if nei in candidate_nodes:
neighborhood_size += 1
# Apply filtering
if neighborhood_size > neighbor_size_threshold:
updated_candidates.append(node)
else:
modified = True
candidate_nodes = updated_candidates
# prepare the lcps and the neighborhood matrix
for node in candidate_nodes:
lcp = lcpg.bidict_nodes[node]
all_lcps.append(lcp)
neighbors[lcp] = []
for neighbor_node in lcpg[node]:
if neighbor_node in candidate_nodes:
nei_lcp = lcpg.bidict_nodes[neighbor_node]
neighbors[lcp].append(nei_lcp)
return LcpOrder(all_lcps, neighbors)
def lcpord_to_lcpg(lcpg, lcp_order):
for idx, lcp in lcp_order.idxs_to_lcps.items():
lcp_node = lcpg.bidict_nodes.inverse[lcp]
lcpg.nodes[lcp_node]["path_idx"] = idx
return lcpg
def max_local_optimization(lcp_order, idx, step):
""" Take the node of index idx and switch it with the index idx+step. Repeat until score increase.
:param lcp_order: lcp order object to modify
:param idx: The idx of lcp to move
:param step: The jump size and orientation
:return: True if the order was modified.
"""
orientation = -1 if step < 0 else 1
i = idx if orientation == 1 else idx-1
start_i = i
jump_size = abs(step)
assert step != 0
max_score_idx = i
max_diff_score = 0
while jump_size-1 <= i <= len(lcp_order.idxs_to_lcps) - jump_size - 1:
diff_score = 0
for _ in range(jump_size):
diff_score += lcp_order.switch_right(i)
i += orientation
if diff_score < max_diff_score:
max_diff_score = diff_score
max_score_idx = i
if diff_score > 0:
break
while i != max_score_idx:
# for _ in range(jump_size):
i -= orientation
lcp_order.switch_right(i)
return start_i != max_score_idx
def queue_optimization(lcp_order, max_steps=10000, max_turn_without_score_modif=-1):
# Queues init
score_queue = list(lcp_order.lcps_to_idxs.keys())
max_turn_without_score_modif = len(score_queue)
identical_queue = list()
queued_lcps = set(score_queue)
# direction and jump size init
jump_size = len(queued_lcps) // 10
orientation = [-1, 1]
# Control variables
step = 0
step_without_score_increase = 0
while jump_size > 1:
while (len(score_queue) > 0 or len(identical_queue) > 0)\
and step < max_steps and (jump_size > 1 or step_without_score_increase < max_turn_without_score_modif):
# Init
step += 1
step_without_score_increase += 1
current_lcp = None
if len(score_queue) > 0:
current_lcp = score_queue.pop(0)
elif len(identical_queue) > 0:
current_lcp = identical_queue.pop(0)
queued_lcps.remove(current_lcp)
init_idx = lcp_order.lcps_to_idxs[current_lcp]
shuffle(orientation)
prev_score = lcp_order.score
# Hill climb
modified = max_local_optimization(lcp_order, init_idx, orientation[0]*jump_size)
current_idx = lcp_order.lcps_to_idxs[current_lcp]
modified = max_local_optimization(lcp_order, current_idx, orientation[1]*jump_size) or modified
end_idx = lcp_order.lcps_to_idxs[current_lcp]
if modified:
if lcp_order.score != prev_score:
step_without_score_increase = 0
max_turn_without_score_modif = len(score_queue) + len(identical_queue)
# Add neighbors in queue
if init_idx != end_idx:
to_add = set()
for idx in range(init_idx, end_idx, -1 if end_idx < init_idx else 1):
lcp = lcp_order.idxs_to_lcps[idx]
to_add.add(lcp)
for lcp in lcp_order.neighborhood[current_lcp]:
to_add.add(lcp)
for lcp in to_add:
if lcp not in queued_lcps:
queued_lcps.add(lcp)
if lcp_order.score != prev_score:
score_queue.append(lcp)
else:
identical_queue.append(lcp)
print(lcp_order.score, len(score_queue), len(identical_queue), jump_size)
if step_without_score_increase > len(queued_lcps):
score_queue = list(lcp_order.idxs_to_lcps.values())
identical_queue = []
queued_lcps = set(score_queue)
max_turn_without_score_modif = len(queued_lcps)
jump_size //= 10
jump_size = max(jump_size+1, 1)
score_queue = list(lcp_order.idxs_to_lcps.values())
identical_queue = []
queued_lcps = set(score_queue)
max_turn_without_score_modif = len(queued_lcps)
jump_size //= 10
jump_size = max(jump_size+1, 1)
lcp_order.debug_save(f"snake_experiments/debug_pathorder_{lcp_order.score}.txt")
def main():
lcpg = LcpGraph()
lcpg.load("snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath442073.gexf")
order = lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=1, existing_path=True)
queue_optimization(order, max_steps=float("inf"))
lcpg = lcpord_to_lcpg(lcpg, order)
nx.write_gexf(lcpg, f"snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath{order.score}.gexf")
if __name__ == "__main__":
main()
......@@ -59,7 +59,8 @@ def main():
print(f"covering score: {path.covering_score()}")
# solution.save_path_in_graph(f"{args.out_prefix}_d2_path.gexf")
path.save_gexf(f"{args.out_prefix}_path.gexf")
lcpg = path.to_lcpg()
nx.write_gexf(lcpg, f"{args.out_prefix}_path.gexf")
print("Solution saved")
......
from deconvolution.lcpgraph.lcp_graph import LcpGraph
from deconvolution.lcpgraph.lcp_path_ordering import lcpord_from_connex_lcpg
from deconvolution.main.evaluate import compute_shortest_edit_path
def heat_path(order):
heatpath = [0]*len(order.idxs_to_lcps)
for idx, lcp in order.idxs_to_lcps.items():
heatpath[idx] = order.neighborhood_score(lcp)
return heatpath
def main():
lcpg = LcpGraph()
lcpg.load("snake_experiments/simu_0_bar_n500_d10_m2-dev0_lcpg_reduced_orderpath442073.gexf")
order = lcpord_from_connex_lcpg(lcpg, neighbor_size_threshold=1, existing_path=True)
path = order.to_path(lcpg)
path_graph = path.to_lcpg()
sep = compute_shortest_edit_path(path_graph)
print(len(sep))
if __name__ == "__main__":
main()
......@@ -19,13 +19,13 @@ class TestD2Graph(unittest.TestCase):
# d2.construct_from_barcodes(neighbor_threshold=0, min_size_clique=d, verbose=False)
# print("after", d)
#
# # for lcp in d2.all_lcp:
# # for lcp in d2.all_lcps:
# # print(lcp.score, lcp.get_link_divergence(), lcp)
# # print()
#
# # Test the number of d-graphs
# awaited_d_num = size - 2 * d
# self.assertEqual(awaited_d_num, len(d2.all_lcp))
# self.assertEqual(awaited_d_num, len(d2.all_lcps))
#
# # Test connectivity
# # Center node names
......@@ -50,12 +50,12 @@ class TestD2Graph(unittest.TestCase):
barcode_graph = nx.read_gexf("test_data/bar_1000_5_2.gexf")
d2 = LcpGraph(barcode_graph)
d2.construct_from_barcodes()
udgs = d2.all_lcp
udgs = d2.all_lcps
for _ in range(5):
d2 = LcpGraph(barcode_graph)
d2.construct_from_barcodes()
self.assertEqual(len(udgs), len(d2.all_lcp))
self.assertEqual(len(udgs), len(d2.all_lcps))
def test_reloading(self):
......@@ -84,11 +84,11 @@ class TestD2Graph(unittest.TestCase):
# TODO: Verify distances
# Test all_lcp
self.assertEqual(len(d2_reloaded.all_lcp), len(d2.all_lcp))
# Test all_lcps
self.assertEqual(len(d2_reloaded.all_lcps), len(d2.all_lcps))
# Verify lcp idxs
reloaded_idxs = [dg.idx for dg in d2_reloaded.all_lcp]
for dg in d2.all_lcp:
reloaded_idxs = [dg.idx for dg in d2_reloaded.all_lcps]
for dg in d2.all_lcps:
self.assertTrue(dg.idx in reloaded_idxs)
......