Commit d1bb245c authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

finitions for d2_to_path

parent e51992a0
......@@ -62,7 +62,21 @@ class Path(list):
return num_covered
def save_path(self, filename):
d2p = self.d2g.subgraph([str(x.idx) for x in self])
d2p = nx.Graph()
# Add the nodes
for udg in self:
d2p.add_node(udg.idx)
d2p.nodes[udg.idx]["center"] = udg.center
d2p.nodes[udg.idx]["udg"] = str(udg)
d2p.nodes[udg.idx]["score"] = f"{udg.score}/{udg.get_optimal_score()}"
# add the edges
for idx in range(len(self)-1):
udg1 = self[idx]
udg2 = self[idx+1]
d2p.add_edge(udg1.idx, udg2.idx)
nx.write_gexf(d2p, filename)
def save_path_in_graph(self, filename):
......@@ -81,4 +95,16 @@ class Unitig(Path):
self.add_path([udg])
def d2_path_to_barcode_path(path):
barcode_per_idx = [set(udg.to_list()) for udg in path]
diff_barcode_per_idx = []
rev_diff_barcode_per_idx = []
for idx in range(len(barcode_per_idx)-1):
diff_barcode_per_idx.append(barcode_per_idx[idx] - barcode_per_idx[idx+1])
rev_diff_barcode_per_idx.append(barcode_per_idx[idx+1] - barcode_per_idx[idx])
diff_barcode_per_idx.append(barcode_per_idx[-1] - diff_barcode_per_idx[-1])
rev_diff_barcode_per_idx.insert(0, barcode_per_idx[0] - rev_diff_barcode_per_idx[0])
for diff, rev_diff in zip(diff_barcode_per_idx, rev_diff_barcode_per_idx):
print(diff, rev_diff)
#!/usr/bin/env python3
import networkx as nx
import path_optimization as po
import argparse
import sys
import d2_graph as d2
import path_algorithms as pa
from d2_algorithms import compute_unitigs
def parse_arguments():
parser = argparse.ArgumentParser(description='Greedy construction of a path through the d2 graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formatted file.')
parser.add_argument('d2_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
parser.add_argument('--output_path', '-o', default="d2_path.gexf", help="Output file prefix.")
parser.add_argument('--out_prefix', '-o', default="", help="Output file prefix.")
args = parser.parse_args()
return args
......@@ -38,34 +37,21 @@ def main():
largest_component_nodes = max(nx.connected_components(d2g), key=len)
largest_component = d2g.subgraph(largest_component_nodes)
import path_optimization as po
# Start optimization
optimizer = po.Optimizer(largest_component)
optimizer.init_random_solutions(1)
solution = optimizer.solutions[0]
print(solution)
print(solution.covering_score())
print("Solution creation...")
optimizer.extends_until_end(solution)
print(solution.covering_score())
print(f"covering score: {solution.covering_score()}")
solution.save_path_in_graph("data/test_d2_path.gexf")
solution.save_path("data/test_path.gexf")
solution.save_path_in_graph(f"{args.out_prefix}_d2_path.gexf")
solution.save_path(f"{args.out_prefix}_path.gexf")
print("Solution saved")
# unitigs = compute_unitigs(largest_component)
# path = pa.construct_path_from_unitigs(unitigs, largest_component)
# print("\n".join([str(x) for x in path]))
# print(path.covering_score())
# diameter = nx.diameter(largest_component)
# print(diameter)
# Write the simplified graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name)
# from d2_path import d2_path_to_barcode_path
# d2_path_to_barcode_path(solution)
if __name__ == "__main__":
......
......@@ -11,16 +11,15 @@ class Optimizer:
def init_random_solutions(self, nb_solutions):
for _ in range(nb_solutions):
rnd_sol = Solution(self.d2g)
rnd_sol.random_init()
rnd_sol.random_init_best_quality()
self.solutions.append(rnd_sol)
def extends_until_end(self, solution):
while self.extends(solution):
print(len(solution))
continue
solution.reverse()
print("reverse the solution")
while self.extends(solution):
print(len(solution))
continue
def extends(self, solution):
# Get all the neighbors
......@@ -28,24 +27,15 @@ class Optimizer:
neighbors = [self.d2g.node_by_idx[int(x)] for x in self.d2g.neighbors(cur_id) if
self.d2g.node_by_idx[int(x)] not in solution]
# filter the neighbors if they are not contributing to variable coverage
# filtered = []
current_vars = frozenset([x for x, y in solution.covering_variables.items() if y > 0])
# for nei_id in neighbors:
# nei = self.d2g.node_by_idx[int(nei_id)]
# variables = self.d2g.get_covering_variables(nei)
# new_vars = variables - current_vars
# if len(new_vars) > 0:
# filtered.append(nei)
if len(neighbors) == 0:
return False
# Choose using the multiple optimization directions
next_udg = min(neighbors,
key=lambda x: (1 if len(self.d2g.get_covering_variables(x) - current_vars) == 0 else 0,
self.d2g[str(x.idx)][cur_id]["distance"],
x.get_link_divergence()))
x.get_link_divergence(),
self.d2g[str(x.idx)][cur_id]["distance"]))
solution.add_path([next_udg])
return True
......@@ -55,9 +45,12 @@ class Solution(Path):
def __init__(self, d2g):
super(Solution, self).__init__(d2g)
def random_init(self):
random_node_idx = random.choice(list(self.d2g.nodes()))
random_node = self.d2g.node_by_idx[int(random_node_idx)]
def random_init_best_quality(self):
nodes = [self.d2g.node_by_idx[int(x)] for x in list(self.d2g.nodes())]
min_div = (min(nodes, key=lambda x: x.get_link_divergence())).get_link_divergence()
nodes = [x for x in nodes if x.get_link_divergence() == min_div]
random_udg = random.choice(nodes)
self.clear()
self.add_path([random_node])
self.add_path([random_udg])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment