diff --git a/deconvolution/d2graph/d2_graph.py b/deconvolution/d2graph/d2_graph.py
index ce76aa5c6a1698d53be527646029ce1498b1e7a6..47d537507e49164dac9692c93d14fd0525c5babc 100644
--- a/deconvolution/d2graph/d2_graph.py
+++ b/deconvolution/d2graph/d2_graph.py
@@ -20,6 +20,7 @@ class D2Graph(nx.Graph):
self.barcode_graph = None
self.index = None
+ self.variables = set()
self.variables_per_lcp = {}
self.barcode_edge_idxs = {}
@@ -50,6 +51,8 @@ class D2Graph(nx.Graph):
# Node by idx
G.node_by_idx = self.node_by_idx
+ G.variables = self.variables.copy()
+
return G
def clone(self):
@@ -106,27 +109,13 @@ class D2Graph(nx.Graph):
lcp = self.get_lcp(obj)
if lcp not in self.variables_per_lcp:
variables = []
- for e in lcp.edges:
- variables.append(self.barcode_edge_idxs[e])
+ for e_idx in lcp.edges:
+ variables.append(e_idx)
self.variables_per_lcp[lcp] = variables
return self.variables_per_lcp[lcp]
- def save(self, filename):
- with open(filename, "w") as fp:
- # First line nb_nodes nb_cov_var
- fp.write(f"{len(self.all_d_graphs)} {int((len(self.barcode_edge_idxs)+self.nb_uniq_edge)/2)}\n")
-
- # Write the edges per d_graph
- for d_graph in self.all_d_graphs:
- fp.write(f"{d_graph.idx} {' '.join([str(self.barcode_edge_idxs[e]) for e in d_graph.edges])}\n")
-
- # Write the distances
- for x, y, data in self.edges(data=True):
- fp.write(f"{x} {y} {data['distance']}\n")
-
-
def load(self, filename):
# Reload the graph
G = nx.read_gexf(filename)
@@ -142,6 +131,7 @@ class D2Graph(nx.Graph):
for idx, node in enumerate(self.nodes(data=True)):
node, data = node
dg = Dgraph.load(data["udg"], data["score"], data["barcode_edges"])
+ self.variables.update(dg.edges)
self.bidict_nodes[node] = dg
self.all_d_graphs.append(dg)
if dg.idx == -1:
diff --git a/deconvolution/d2graph/d2_path.py b/deconvolution/d2graph/d2_path.py
index e37c21c3b543cf7d82c4b0ba7074b6af83bb5c0f..5c5080f50c92f176b4d175628534ed50de3e7619 100644
--- a/deconvolution/d2graph/d2_path.py
+++ b/deconvolution/d2graph/d2_path.py
@@ -10,7 +10,7 @@ class Path(list):
def __init__(self, d2g):
super(Path, self).__init__()
self.d2g = d2g
- self.covering_variables = {x: 0 for x in self.d2g.barcode_edge_idxs.values()}
+ self.covering_variables = {x: 0 for x in self.d2g.variables}
self.covering_value = 0
# a succession of Counter (multiset)
self.barcode_order = []
@@ -22,8 +22,7 @@ class Path(list):
lcp = self.d2g.get_lcp(obj)
# Update the covering variables
- for barcode_edge in lcp.edges:
- edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
+ for edge_idx in lcp.edges:
if self.covering_variables[edge_idx] == 0:
self.covering_value += 1
self.covering_variables[edge_idx] += 1
@@ -75,10 +74,9 @@ class Path(list):
self._pop_barcodes(lcp)
# Update the covering variables
- for barcode_edge in lcp.edges:
- edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
- self.covering_variables[edge_idx] -= 1
- if self.covering_variables[edge_idx] == 0:
+ for e_idx in lcp.edges:
+ self.covering_variables[e_idx] -= 1
+ if self.covering_variables[e_idx] == 0:
self.covering_value -= 1
return lcp
@@ -137,7 +135,7 @@ class Path(list):
d2p.nodes[udg.idx]["center"] = udg.center
d2p.nodes[udg.idx]["udg"] = str(udg)
d2p.nodes[udg.idx]["score"] = f"{udg.score}/{udg.get_optimal_score()}"
- barcode_edges = " ".join([str(self.d2g.barcode_edge_idxs[x]) for x in udg.edges])
+ barcode_edges = " ".join([str(x) for x in udg.edges])
d2p.nodes[udg.idx]["barcode_edges"] = barcode_edges
# add the edges
diff --git a/deconvolution/main/d2_to_path.py b/deconvolution/main/d2_to_path.py
index 5ff51922f3deccbff9adca2d92b95f6f160a963f..516aa764c9b5d3f4debee311a86cc75bf2110fca 100755
--- a/deconvolution/main/d2_to_path.py
+++ b/deconvolution/main/d2_to_path.py
@@ -9,15 +9,14 @@ from deconvolution.d2graph import d2_graph as d2, path_optimization as po
def parse_arguments():
parser = argparse.ArgumentParser(description='Greedy construction of a path through the d2 graph.')
- parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formatted file.')
- parser.add_argument('d2_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
- parser.add_argument('--out_prefix', '-o', default="", help="Output file prefix.")
+ parser.add_argument('lcp_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
+ parser.add_argument('--outfile', '-o', default="", help="Output file prefix.")
parser.add_argument('--verbose', '-v', action="store_true", help="Verbose")
args = parser.parse_args()
- if args.out_prefix == "":
- args.out_prefix = '.'.join(args.d2_graph.split('.')[:-1])
+ if args.outfile == "":
+ args.outfile = '.'.join(args.lcp_graph.split('.')[:-1]) + "_path.gexf"
return args
@@ -26,20 +25,18 @@ def main():
# Parsing the arguments and validate them
args = parse_arguments()
- barcode_file = args.barcode_graph
- d2_file = args.d2_graph
- if (not barcode_file.endswith('.gexf')) or (not d2_file.endswith(".gexf")):
+ lcpg_name = args.lcp_graph
+ if not lcpg_name.endswith(".gexf"):
print("Inputs file must be gexf formatted", file=sys.stderr)
exit(1)
# Loading
- G = nx.read_gexf(barcode_file)
- d2g = d2.D2Graph(G)
- d2g.load(d2_file)
+ lcpg = d2.D2Graph()
+ lcpg.load(lcpg_name)
# Take the principal component
- largest_component_nodes = max(nx.connected_components(d2g), key=len)
- largest_component = d2g.subgraph(largest_component_nodes)
+ largest_component_nodes = max(nx.connected_components(lcpg), key=len)
+ largest_component = lcpg.subgraph(largest_component_nodes)
# Start optimization
optimizer = po.Optimizer(largest_component)
@@ -50,7 +47,7 @@ def main():
print()
print()
print(f"covering score: {path.covering_score()}")
- path.save_gexf(f"{args.out_prefix}_path.gexf")
+ path.save_gexf(args.outfile)
print("Solution saved")