Commit 7d2ef666 authored by Yoann Dufresne's avatar Yoann Dufresne

partial order bad greedy ok

parent 83e8d871
......@@ -45,17 +45,10 @@ class PartialOrder:
return -1, Counter(), remaining_barcodes
def add_right(self, udg):
self.udg_order.append(udg)
self.len_udgs += 1
# Empty case
if len(self) == 0:
self.barcode_order.append(Counter(udg.nodes))
self.udg_per_set.append({udg})
return
# Step 1 - Determine overlapping multisets from right to left
leftmost_idx, left_non_overlap, new_multiset = self._get_right_overlaps(udg)
# Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2)
......@@ -70,6 +63,7 @@ class PartialOrder:
# Copy the previous overlapping udg set for the new multiset
self.udg_per_set.insert(leftmost_idx, self.udg_per_set[leftmost_idx-1].copy())
self.len_sets += 1
self.score += len(self.udg_per_set[leftmost_idx])
# Step 3 - Add a new multiset on the right for the remaining barcodes
if len(new_multiset) > 0:
......@@ -81,9 +75,7 @@ class PartialOrder:
# Step 4 - Add the udg as covering the right multisets
for idx in range(max(0, leftmost_idx), self.len_sets):
self.udg_per_set[idx].add(udg)
# TODO: Step 5 - Modify score
self.score += 1
def get_add_score(self, udg):
score = 0
......@@ -105,22 +97,73 @@ class PartialOrder:
remaining_size = sum(remaining_right.values()) - sum(left_non_overlap.values())
leftmost_idx -= 1
# Search for non overlapped common barcodes
while remaining_size > 0 and leftmost_idx >= 0:
ms = self.barcode_order[leftmost_idx]
# Compute intersection
common = ms & remaining_right
candidate_negative = sum(common.values())
score -= min(candidate_negative, remaining_size)
# Update structures
remaining_right -= common
remaining_size -= sum(ms.values())
leftmost_idx -= 1
return score
def reverse_order(self):
self.barcode_order = self.barcode_order[::-1]
self.udg_order = self.udg_order[::-1]
self.udg_per_set = self.udg_per_set[::-1]
def __len__(self):
return self.len_barcodes
_saved_neighbors = {}
def _next_node(d2g, partial_order, node, used):
node = str(node)
# create neighborhood on the first call
if node not in _saved_neighbors:
_saved_neighbors[node] = {str(x) for x in d2g[node] if not used[str(x)]}
neighbors = _saved_neighbors[node]
# Return None is no usable neighbor
if len(neighbors) == 0:
return None
max_score = 0
max_neighbor_name = None
for neighbor_name in neighbors:
neighbor_udg = d2g.node_by_idx[int(neighbor_name)]
neighbor_score = partial_order.get_add_score(neighbor_udg)
if neighbor_score > max_score:
max_score = neighbor_score
max_neighbor_name = neighbor_name
if max_neighbor_name is None:
return None
else:
neighbors.discard(max_neighbor_name)
return max_neighbor_name
def greedy_partial_order(d2g, node):
used_nodes = {str(n): False for n in d2g.nodes()}
used_nodes[str(node)] = True
current_node = node
current_udg = d2g.node_by_idx[int(node)]
po = PartialOrder()
po.add_right(current_udg)
forward = True
reverse = True
while forward or reverse:
next_node_name = _next_node(d2g, po, str(current_node), used_nodes)
if next_node_name is not None:
next_udg = d2g.node_by_idx[int(next_node_name)]
po.add_right(next_udg)
used_nodes[next_node_name] = True
current_node = next_node_name
else:
if forward:
forward = False
po.reverse_order()
current_node = str(po.udg_order[-1].idx)
print(po.score, "reverse")
else:
reverse = False
return po
#!/usr/bin/env python3
import networkx as nx
import argparse
import sys
import random
from deconvolution.d2graph import d2_graph as d2
from barcodes.partialorder import greedy_partial_order
def parse_arguments():
parser = argparse.ArgumentParser(description='Greedy construction of a path through the d2 graph.')
parser.add_argument('barcode_graph', help='The barcode graph file. Must be a gefx formatted file.')
parser.add_argument('d2_graph', help='d2 graph to reduce. Must be a gexf formatted file.')
parser.add_argument('--out_prefix', '-o', default="", help="Output file prefix.")
args = parser.parse_args()
if args.out_prefix == "":
args.out_prefix = '.'.join(args.d2_graph.split('.')[:-1])
return args
def main():
# Parsing the arguments and validate them
args = parse_arguments()
barcode_file = args.barcode_graph
d2_file = args.d2_graph
if (not barcode_file.endswith('.gexf')) or (not d2_file.endswith(".gexf")):
print("Inputs file must be gexf formatted", file=sys.stderr)
exit(1)
# Loading
G = nx.read_gexf(barcode_file)
d2g = d2.D2Graph(G)
d2g.load(d2_file)
# Take the principal component
largest_component_nodes = max(nx.connected_components(d2g), key=len)
largest_component = d2g.subgraph(largest_component_nodes)
all_nodes = list(largest_component.nodes())
rnd_node = all_nodes[random.randint(0, len(all_nodes)-1)]
po = greedy_partial_order(largest_component, rnd_node)
print("barcodes", len(po))
print("sets", po.len_sets)
print("udgs", po.len_udgs)
print("score", po.score)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment