Commit 01d8b380 by Yoann Dufresne

### partial order datastructure + add right

parent a2baf0b0
 from collections import Counter class PartialOrder: def __init__(self): # --- Structural components --- # The barcode order is a succession of multiset of barcodes self.barcode_order = [] # The udg order is a succession of udgs following some rules: # 1 - Two successive udgs define 3 successive multiset of barcodes: A-B ; A⋂B ; B-A # 2 - 3 successive udgs A, B, C can't define successive sets with "holes". # ie C can't overlap A-B without completely covering A⋂B self.udg_order = [] # udg_per_set have the same size than barcode_order. # All the udg at index i completely cover the barcodes in barcode_order[i] self.udg_per_set = [] # --- Counting components --- # Number of barcode ordered. Same as the sum of all the values in all the multisets in barcode_order self.len_barcodes = 0 # The number of successive multiset. Same as len(barcode_order). self.len_sets = 0 # The number of successive udg. Same as len(udg_order) self.len_udgs = 0 # TODO: score def _get_right_overlaps(self, udg): """" Get the overlap of an udg with the right part of the multiset partial order. :param udg: The udg to overlap :returns: The index of the leftmost overlapped multiset, the non overlapped part of the leftmost covered multiset, the new multiset (non overlapping barcodes) """ remaining_barcodes = Counter(udg.nodes) current_set_idx = self.len_sets - 1 # Will look for full overlaps from right to left while current_set_idx >= 0: ms = self.barcode_order[current_set_idx] if len(ms - remaining_barcodes) == 0 and ms != remaining_barcodes: current_set_idx -= 1 remaining_barcodes -= ms else: # leftmost multiset , leftmost non overlapping, non overlapping barcodes return current_set_idx, ms - remaining_barcodes, remaining_barcodes - ms return -1, Counter(), remaining_barcodes def add_right(self, udg): self.udg_order.append(udg) self.len_udgs += 1 # Empty case if len(self) == 0: self.barcode_order.append(Counter(udg.nodes)) self.udg_per_set.append({udg}) return # Step 1 - Determine overlapping multisets from right to left leftmost_idx, left_non_overlap, new_multiset = self._get_right_overlaps(udg) # Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2) if len(left_non_overlap) > 0: ms = self.barcode_order[leftmost_idx] left_ms = left_non_overlap right_ms = ms - left_ms self.barcode_order[leftmost_idx] = left_ms leftmost_idx += 1 self.barcode_order.insert(leftmost_idx, right_ms) # Copy the previous overlapping udg set for the new multiset self.udg_per_set.insert(leftmost_idx, self.udg_per_set[leftmost_idx-1].copy()) self.len_sets += 1 # Step 3 - Add a new multiset on the right for the remaining barcodes if len(new_multiset) > 0: self.barcode_order.append(new_multiset) self.udg_per_set.append(set()) self.len_sets += 1 self.len_barcodes += sum(new_multiset.values()) # Step 4 - Add the udg as covering the right multisets for idx in range(max(0, leftmost_idx), len(self.udg_per_set)): self.udg_per_set[idx].add(udg) # TODO: Step 5 - Modify score def __len__(self): return self.len_barcodes
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!