Commit 97b187fc authored by Yoann Dufresne's avatar Yoann Dufresne

bugged version of barcode ordonancer

parent 7d2ef666
......@@ -25,6 +25,24 @@ class PartialOrder:
# This score must be updated when the order is modified
self.score = 0
self.debug_stack = []
def copy(self):
copy = PartialOrder()
for ms in self.barcode_order:
copy.barcode_order.append(ms.copy())
for udg in self.udg_order:
copy.udg_order.append(udg)
for udg_set in self.udg_per_set:
copy.udg_per_set.append(udg_set.copy())
copy.len_barcodes = self.len_barcodes
copy.len_sets = self.len_sets
copy.len_udgs = self.len_udgs
copy.score = self.score
return copy
def _get_right_overlaps(self, udg):
"""" Get the overlap of an udg with the right part of the multiset partial order.
:param udg: The udg to overlap
......@@ -36,19 +54,25 @@ class PartialOrder:
# Will look for full overlaps from right to left
while current_set_idx >= 0:
ms = self.barcode_order[current_set_idx]
if len(ms - remaining_barcodes) == 0 and ms != remaining_barcodes:
current_set_idx -= 1
remaining_barcodes -= ms
remaining_barcodes = remaining_barcodes - ms
elif len(ms & remaining_barcodes) == 0:
return current_set_idx+1, Counter(), remaining_barcodes
else:
# leftmost multiset , leftmost non overlapping, non overlapping barcodes
return current_set_idx, ms - remaining_barcodes, remaining_barcodes - ms
return -1, Counter(), remaining_barcodes
return 0, Counter(), remaining_barcodes
def add_right(self, udg):
save = self.copy()
self.udg_order.append(udg)
self.len_udgs += 1
scores = [0, 0, 0]
# Step 1 - Determine overlapping multisets from right to left
leftmost_idx, left_non_overlap, new_multiset = self._get_right_overlaps(udg)
# Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2)
......@@ -64,18 +88,63 @@ class PartialOrder:
self.udg_per_set.insert(leftmost_idx, self.udg_per_set[leftmost_idx-1].copy())
self.len_sets += 1
self.score += len(self.udg_per_set[leftmost_idx])
scores[0] += len(self.udg_per_set[leftmost_idx])
# Step 3 - Add the udg as covering the right multisets
for idx in range(max(0, leftmost_idx), self.len_sets):
self.udg_per_set[idx].add(udg)
self.score += 1
scores[1] += 1
# Step 3 - Add a new multiset on the right for the remaining barcodes
# Step 4 - Add a new multiset on the right for the remaining barcodes
if len(new_multiset) > 0:
self.barcode_order.append(new_multiset)
self.udg_per_set.append(set())
self.len_sets += 1
self.len_barcodes += sum(new_multiset.values())
# Step 4 - Add the udg as covering the right multisets
for idx in range(max(0, leftmost_idx), self.len_sets):
self.udg_per_set[idx].add(udg)
self.len_sets += 1
self.score += 1
scores[2] += 1
self.udg_per_set.append({udg})
self.debug_stack.append((udg, scores))
def add_right2(self, udg):
left_idx, leftmost_overlap, rightmost_overlap = self._get_right_overlaps2(udg)
def remove_right(self):
save = self.copy()
# Step 1 - Remove the udg
last_udg = self.udg_order.pop()
scores = [0]*3
last_debug, last_scores = self.debug_stack.pop()
self.len_udgs -= 1
# Step 2 - Remove the last multiset if only cover by last_udg
if len(self.udg_per_set[-1]) == 1:
self.udg_per_set.pop()
ms = self.barcode_order.pop()
self.len_barcodes -= sum(ms.values())
self.len_sets -= 1
scores[2] = -1
self.score -= 1
# Step 3 - Remove last_udg from coverings from right to left
rightmost_covered_idx = len(self.barcode_order) - 1
while rightmost_covered_idx >= 0 and last_udg in self.udg_per_set[rightmost_covered_idx]:
self.udg_per_set[rightmost_covered_idx].remove(last_udg)
self.score -= 1
scores[1] -= 1
rightmost_covered_idx -= 1
# Step 4 - Merge the two left sets of interest if they are identical
left_interest = rightmost_covered_idx
if 0 <= left_interest < len(self.udg_per_set) - 1:
# Check set similarity
if self.udg_per_set[left_interest] == self.udg_per_set[left_interest+1]:
sets = self.udg_per_set.pop(left_interest)
self.score -= len(sets)
scores[0] -= len(sets)
ms = self.barcode_order.pop(left_interest)
self.barcode_order[left_interest] = self.barcode_order[left_interest] + ms
self.len_sets -= 1
return last_udg
def get_add_score(self, udg):
score = 0
......@@ -92,11 +161,6 @@ class PartialOrder:
# covering number points for the new udg
score += self.len_sets - leftmost_idx
# Negative points for redundant elements
# shift one left
remaining_size = sum(remaining_right.values()) - sum(left_non_overlap.values())
leftmost_idx -= 1
return score
def reverse_order(self):
......@@ -108,6 +172,7 @@ class PartialOrder:
return self.len_barcodes
_predicted_score = 0
_saved_neighbors = {}
def _next_node(d2g, partial_order, node, used):
node = str(node)
......@@ -134,6 +199,8 @@ def _next_node(d2g, partial_order, node, used):
return None
else:
neighbors.discard(max_neighbor_name)
global _predicted_score
_predicted_score = max_score
return max_neighbor_name
......@@ -161,9 +228,42 @@ def greedy_partial_order(d2g, node):
forward = False
po.reverse_order()
current_node = str(po.udg_order[-1].idx)
print(po.score, "reverse")
else:
reverse = False
return po
def bb_partial_order(d2g, node):
used_nodes = {str(n): False for n in d2g.nodes()}
used_nodes[str(node)] = True
current_node_name = str(node)
current_udg = d2g.node_by_idx[int(node)]
po = PartialOrder()
po.add_right(current_udg)
can_continue = True
while can_continue:
next_node_name = _next_node(d2g, po, current_node_name, used_nodes)
# We found a new deeper solution
if next_node_name is not None:
next_udg = d2g.node_by_idx[int(next_node_name)]
_score = po.score
global _predicted_score
po.add_right(next_udg)
_score = po.score - _score
used_nodes[next_node_name] = True
current_node_name = next_node_name
# All the possible solutions have been explored
elif len(po) == 0:
can_continue = False
# We are in a dead end, must go back one step
else:
yield po.copy()
back_udg = po.remove_right()
used_nodes[current_node_name] = False
del _saved_neighbors[current_node_name]
current_node_name = str(back_udg.idx)
......@@ -6,7 +6,7 @@ import sys
import random
from deconvolution.d2graph import d2_graph as d2
from barcodes.partialorder import greedy_partial_order
from barcodes.partialorder import greedy_partial_order, bb_partial_order
def parse_arguments():
......@@ -44,11 +44,9 @@ def main():
all_nodes = list(largest_component.nodes())
rnd_node = all_nodes[random.randint(0, len(all_nodes)-1)]
po = greedy_partial_order(largest_component, rnd_node)
print("barcodes", len(po))
print("sets", po.len_sets)
print("udgs", po.len_udgs)
print("score", po.score)
# po = greedy_partial_order(largest_component, rnd_node)
for po in bb_partial_order(largest_component, rnd_node):
print("barcodes", len(po), "sets", po.len_sets, "udgs", po.len_udgs, "score", po.score)
if __name__ == "__main__":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment