Commit 97b187fc authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

bugged version of barcode ordonancer

parent 7d2ef666
...@@ -25,6 +25,24 @@ class PartialOrder: ...@@ -25,6 +25,24 @@ class PartialOrder:
# This score must be updated when the order is modified # This score must be updated when the order is modified
self.score = 0 self.score = 0
self.debug_stack = []
def copy(self):
copy = PartialOrder()
for ms in self.barcode_order:
copy.barcode_order.append(ms.copy())
for udg in self.udg_order:
copy.udg_order.append(udg)
for udg_set in self.udg_per_set:
copy.udg_per_set.append(udg_set.copy())
copy.len_barcodes = self.len_barcodes
copy.len_sets = self.len_sets
copy.len_udgs = self.len_udgs
copy.score = self.score
return copy
def _get_right_overlaps(self, udg): def _get_right_overlaps(self, udg):
"""" Get the overlap of an udg with the right part of the multiset partial order. """" Get the overlap of an udg with the right part of the multiset partial order.
:param udg: The udg to overlap :param udg: The udg to overlap
...@@ -36,19 +54,25 @@ class PartialOrder: ...@@ -36,19 +54,25 @@ class PartialOrder:
# Will look for full overlaps from right to left # Will look for full overlaps from right to left
while current_set_idx >= 0: while current_set_idx >= 0:
ms = self.barcode_order[current_set_idx] ms = self.barcode_order[current_set_idx]
if len(ms - remaining_barcodes) == 0 and ms != remaining_barcodes: if len(ms - remaining_barcodes) == 0 and ms != remaining_barcodes:
current_set_idx -= 1 current_set_idx -= 1
remaining_barcodes -= ms remaining_barcodes = remaining_barcodes - ms
elif len(ms & remaining_barcodes) == 0:
return current_set_idx+1, Counter(), remaining_barcodes
else: else:
# leftmost multiset , leftmost non overlapping, non overlapping barcodes # leftmost multiset , leftmost non overlapping, non overlapping barcodes
return current_set_idx, ms - remaining_barcodes, remaining_barcodes - ms return current_set_idx, ms - remaining_barcodes, remaining_barcodes - ms
return -1, Counter(), remaining_barcodes return 0, Counter(), remaining_barcodes
def add_right(self, udg): def add_right(self, udg):
save = self.copy()
self.udg_order.append(udg) self.udg_order.append(udg)
self.len_udgs += 1 self.len_udgs += 1
scores = [0, 0, 0]
# Step 1 - Determine overlapping multisets from right to left # Step 1 - Determine overlapping multisets from right to left
leftmost_idx, left_non_overlap, new_multiset = self._get_right_overlaps(udg) leftmost_idx, left_non_overlap, new_multiset = self._get_right_overlaps(udg)
# Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2) # Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2)
...@@ -64,18 +88,63 @@ class PartialOrder: ...@@ -64,18 +88,63 @@ class PartialOrder:
self.udg_per_set.insert(leftmost_idx, self.udg_per_set[leftmost_idx-1].copy()) self.udg_per_set.insert(leftmost_idx, self.udg_per_set[leftmost_idx-1].copy())
self.len_sets += 1 self.len_sets += 1
self.score += len(self.udg_per_set[leftmost_idx]) self.score += len(self.udg_per_set[leftmost_idx])
scores[0] += len(self.udg_per_set[leftmost_idx])
# Step 3 - Add the udg as covering the right multisets
for idx in range(max(0, leftmost_idx), self.len_sets):
self.udg_per_set[idx].add(udg)
self.score += 1
scores[1] += 1
# Step 3 - Add a new multiset on the right for the remaining barcodes # Step 4 - Add a new multiset on the right for the remaining barcodes
if len(new_multiset) > 0: if len(new_multiset) > 0:
self.barcode_order.append(new_multiset) self.barcode_order.append(new_multiset)
self.udg_per_set.append(set())
self.len_sets += 1
self.len_barcodes += sum(new_multiset.values()) self.len_barcodes += sum(new_multiset.values())
self.len_sets += 1
# Step 4 - Add the udg as covering the right multisets
for idx in range(max(0, leftmost_idx), self.len_sets):
self.udg_per_set[idx].add(udg)
self.score += 1 self.score += 1
scores[2] += 1
self.udg_per_set.append({udg})
self.debug_stack.append((udg, scores))
def add_right2(self, udg):
left_idx, leftmost_overlap, rightmost_overlap = self._get_right_overlaps2(udg)
def remove_right(self):
save = self.copy()
# Step 1 - Remove the udg
last_udg = self.udg_order.pop()
scores = [0]*3
last_debug, last_scores = self.debug_stack.pop()
self.len_udgs -= 1
# Step 2 - Remove the last multiset if only cover by last_udg
if len(self.udg_per_set[-1]) == 1:
self.udg_per_set.pop()
ms = self.barcode_order.pop()
self.len_barcodes -= sum(ms.values())
self.len_sets -= 1
scores[2] = -1
self.score -= 1
# Step 3 - Remove last_udg from coverings from right to left
rightmost_covered_idx = len(self.barcode_order) - 1
while rightmost_covered_idx >= 0 and last_udg in self.udg_per_set[rightmost_covered_idx]:
self.udg_per_set[rightmost_covered_idx].remove(last_udg)
self.score -= 1
scores[1] -= 1
rightmost_covered_idx -= 1
# Step 4 - Merge the two left sets of interest if they are identical
left_interest = rightmost_covered_idx
if 0 <= left_interest < len(self.udg_per_set) - 1:
# Check set similarity
if self.udg_per_set[left_interest] == self.udg_per_set[left_interest+1]:
sets = self.udg_per_set.pop(left_interest)
self.score -= len(sets)
scores[0] -= len(sets)
ms = self.barcode_order.pop(left_interest)
self.barcode_order[left_interest] = self.barcode_order[left_interest] + ms
self.len_sets -= 1
return last_udg
def get_add_score(self, udg): def get_add_score(self, udg):
score = 0 score = 0
...@@ -92,11 +161,6 @@ class PartialOrder: ...@@ -92,11 +161,6 @@ class PartialOrder:
# covering number points for the new udg # covering number points for the new udg
score += self.len_sets - leftmost_idx score += self.len_sets - leftmost_idx
# Negative points for redundant elements
# shift one left
remaining_size = sum(remaining_right.values()) - sum(left_non_overlap.values())
leftmost_idx -= 1
return score return score
def reverse_order(self): def reverse_order(self):
...@@ -108,6 +172,7 @@ class PartialOrder: ...@@ -108,6 +172,7 @@ class PartialOrder:
return self.len_barcodes return self.len_barcodes
_predicted_score = 0
_saved_neighbors = {} _saved_neighbors = {}
def _next_node(d2g, partial_order, node, used): def _next_node(d2g, partial_order, node, used):
node = str(node) node = str(node)
...@@ -134,6 +199,8 @@ def _next_node(d2g, partial_order, node, used): ...@@ -134,6 +199,8 @@ def _next_node(d2g, partial_order, node, used):
return None return None
else: else:
neighbors.discard(max_neighbor_name) neighbors.discard(max_neighbor_name)
global _predicted_score
_predicted_score = max_score
return max_neighbor_name return max_neighbor_name
...@@ -161,9 +228,42 @@ def greedy_partial_order(d2g, node): ...@@ -161,9 +228,42 @@ def greedy_partial_order(d2g, node):
forward = False forward = False
po.reverse_order() po.reverse_order()
current_node = str(po.udg_order[-1].idx) current_node = str(po.udg_order[-1].idx)
print(po.score, "reverse")
else: else:
reverse = False reverse = False
return po return po
def bb_partial_order(d2g, node):
used_nodes = {str(n): False for n in d2g.nodes()}
used_nodes[str(node)] = True
current_node_name = str(node)
current_udg = d2g.node_by_idx[int(node)]
po = PartialOrder()
po.add_right(current_udg)
can_continue = True
while can_continue:
next_node_name = _next_node(d2g, po, current_node_name, used_nodes)
# We found a new deeper solution
if next_node_name is not None:
next_udg = d2g.node_by_idx[int(next_node_name)]
_score = po.score
global _predicted_score
po.add_right(next_udg)
_score = po.score - _score
used_nodes[next_node_name] = True
current_node_name = next_node_name
# All the possible solutions have been explored
elif len(po) == 0:
can_continue = False
# We are in a dead end, must go back one step
else:
yield po.copy()
back_udg = po.remove_right()
used_nodes[current_node_name] = False
del _saved_neighbors[current_node_name]
current_node_name = str(back_udg.idx)
...@@ -6,7 +6,7 @@ import sys ...@@ -6,7 +6,7 @@ import sys
import random import random
from deconvolution.d2graph import d2_graph as d2 from deconvolution.d2graph import d2_graph as d2
from barcodes.partialorder import greedy_partial_order from barcodes.partialorder import greedy_partial_order, bb_partial_order
def parse_arguments(): def parse_arguments():
...@@ -44,11 +44,9 @@ def main(): ...@@ -44,11 +44,9 @@ def main():
all_nodes = list(largest_component.nodes()) all_nodes = list(largest_component.nodes())
rnd_node = all_nodes[random.randint(0, len(all_nodes)-1)] rnd_node = all_nodes[random.randint(0, len(all_nodes)-1)]
po = greedy_partial_order(largest_component, rnd_node) # po = greedy_partial_order(largest_component, rnd_node)
print("barcodes", len(po)) for po in bb_partial_order(largest_component, rnd_node):
print("sets", po.len_sets) print("barcodes", len(po), "sets", po.len_sets, "udgs", po.len_udgs, "score", po.score)
print("udgs", po.len_udgs)
print("score", po.score)
if __name__ == "__main__": if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment