Commit e268c918 authored by Yoann Dufresne's avatar Yoann Dufresne

remove last dependencies to barcode graph into lcp graphs

parent 2cb22aca
......@@ -20,6 +20,7 @@ class D2Graph(nx.Graph):
self.barcode_graph = None
self.index = None
self.variables = set()
self.variables_per_lcp = {}
self.barcode_edge_idxs = {}
......@@ -50,6 +51,8 @@ class D2Graph(nx.Graph):
# Node by idx
G.node_by_idx = self.node_by_idx
G.variables = self.variables.copy()
return G
def clone(self):
......@@ -106,27 +109,13 @@ class D2Graph(nx.Graph):
lcp = self.get_lcp(obj)
if lcp not in self.variables_per_lcp:
variables = []
for e in lcp.edges:
variables.append(self.barcode_edge_idxs[e])
for e_idx in lcp.edges:
variables.append(e_idx)
self.variables_per_lcp[lcp] = variables
return self.variables_per_lcp[lcp]
def save(self, filename):
with open(filename, "w") as fp:
# First line nb_nodes nb_cov_var
fp.write(f"{len(self.all_d_graphs)} {int((len(self.barcode_edge_idxs)+self.nb_uniq_edge)/2)}\n")
# Write the edges per d_graph
for d_graph in self.all_d_graphs:
fp.write(f"{d_graph.idx} {' '.join([str(self.barcode_edge_idxs[e]) for e in d_graph.edges])}\n")
# Write the distances
for x, y, data in self.edges(data=True):
fp.write(f"{x} {y} {data['distance']}\n")
def load(self, filename):
# Reload the graph
G = nx.read_gexf(filename)
......@@ -142,6 +131,7 @@ class D2Graph(nx.Graph):
for idx, node in enumerate(self.nodes(data=True)):
node, data = node
dg = Dgraph.load(data["udg"], data["score"], data["barcode_edges"])
self.variables.update(dg.edges)
self.bidict_nodes[node] = dg
self.all_d_graphs.append(dg)
if dg.idx == -1:
......
......@@ -10,7 +10,7 @@ class Path(list):
def __init__(self, d2g):
super(Path, self).__init__()
self.d2g = d2g
self.covering_variables = {x: 0 for x in self.d2g.barcode_edge_idxs.values()}
self.covering_variables = {x: 0 for x in self.d2g.variables}
self.covering_value = 0
# a succession of Counter (multiset)
self.barcode_order = []
......@@ -22,8 +22,7 @@ class Path(list):
lcp = self.d2g.get_lcp(obj)
# Update the covering variables
for barcode_edge in lcp.edges:
edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
for edge_idx in lcp.edges:
if self.covering_variables[edge_idx] == 0:
self.covering_value += 1
self.covering_variables[edge_idx] += 1
......@@ -75,10 +74,9 @@ class Path(list):
self._pop_barcodes(lcp)
# Update the covering variables
for barcode_edge in lcp.edges:
edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
self.covering_variables[edge_idx] -= 1
if self.covering_variables[edge_idx] == 0:
for e_idx in lcp.edges:
self.covering_variables[e_idx] -= 1
if self.covering_variables[e_idx] == 0:
self.covering_value -= 1
return lcp
......@@ -137,7 +135,7 @@ class Path(list):
d2p.nodes[udg.idx]["center"] = udg.center
d2p.nodes[udg.idx]["udg"] = str(udg)
d2p.nodes[udg.idx]["score"] = f"{udg.score}/{udg.get_optimal_score()}"
barcode_edges = " ".join([str(self.d2g.barcode_edge_idxs[x]) for x in udg.edges])
barcode_edges = " ".join([str(x) for x in udg.edges])
d2p.nodes[udg.idx]["barcode_edges"] = barcode_edges
# add the edges
......
......@@ -9,15 +9,14 @@ from deconvolution.d2graph import d2_graph as d2, path_optimization as po
def parse_arguments():
parser = argparse.ArgumentParser(description='Greedy construction of a path through the d2 graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formatted file.')
parser.add_argument('d2_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
parser.add_argument('--out_prefix', '-o', default="", help="Output file prefix.")
parser.add_argument('lcp_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
parser.add_argument('--outfile', '-o', default="", help="Output file prefix.")
parser.add_argument('--verbose', '-v', action="store_true", help="Verbose")
args = parser.parse_args()
if args.out_prefix == "":
args.out_prefix = '.'.join(args.d2_graph.split('.')[:-1])
if args.outfile == "":
args.outfile = '.'.join(args.lcp_graph.split('.')[:-1]) + "_path.gexf"
return args
......@@ -26,20 +25,18 @@ def main():
# Parsing the arguments and validate them
args = parse_arguments()
barcode_file = args.barcode_graph
d2_file = args.d2_graph
if (not barcode_file.endswith('.gexf')) or (not d2_file.endswith(".gexf")):
lcpg_name = args.lcp_graph
if not lcpg_name.endswith(".gexf"):
print("Inputs file must be gexf formatted", file=sys.stderr)
exit(1)
# Loading
G = nx.read_gexf(barcode_file)
d2g = d2.D2Graph(G)
d2g.load(d2_file)
lcpg = d2.D2Graph()
lcpg.load(lcpg_name)
# Take the principal component
largest_component_nodes = max(nx.connected_components(d2g), key=len)
largest_component = d2g.subgraph(largest_component_nodes)
largest_component_nodes = max(nx.connected_components(lcpg), key=len)
largest_component = lcpg.subgraph(largest_component_nodes)
# Start optimization
optimizer = po.Optimizer(largest_component)
......@@ -50,7 +47,7 @@ def main():
print()
print()
print(f"covering score: {path.covering_score()}")
path.save_gexf(f"{args.out_prefix}_path.gexf")
path.save_gexf(args.outfile)
print("Solution saved")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment