Commit 1bc64e01 by Yoann Dufresne

### scored unitig extraction ok

parent e7f0a973
 ... @@ -91,6 +91,7 @@ def filter_singeltons(graph): ... @@ -91,6 +91,7 @@ def filter_singeltons(graph): """ Compute the unambiguous paths in a d2g. The d2g must not contain singletons. """ Compute the unambiguous paths in a d2g. The d2g must not contain singletons. The unitigs are sorted by increasing penalty first and decreasing size second. @param d2g a d2g graph @param d2g a d2g graph @return a list of unitigs @return a list of unitigs """ """ ... @@ -117,6 +118,9 @@ def compute_unitigs(d2g): ... @@ -117,6 +118,9 @@ def compute_unitigs(d2g): for node in unitig: for node in unitig: used_nodes[node.idx] = True used_nodes[node.idx] = True # Sort the unitigs unitigs.sort(key=lambda x: (x.normalized_penalty(), -len(x))) return unitigs return unitigs ... @@ -127,7 +131,8 @@ def compute_unitigs(d2g): ... @@ -127,7 +131,8 @@ def compute_unitigs(d2g): @return The constructed unitig @return The constructed unitig """ """ def compute_unitig_from(d2g, node): def compute_unitig_from(d2g, node): unitig = Unitig(udgs=[node]) unitig = Unitig() unitig.add_udgs([node]) if d2g.degree(str(node.idx)) == 2: if d2g.degree(str(node.idx)) == 2: left, right = d2g.neighbors(str(node.idx)) left, right = d2g.neighbors(str(node.idx)) else: else: ... @@ -142,7 +147,7 @@ def compute_unitig_from(d2g, node): ... @@ -142,7 +147,7 @@ def compute_unitig_from(d2g, node): # Extends second side # Extends second side prev_node = node prev_node = node current_node = d2g.node_by_idx[int(right)] if right != None else None current_node = d2g.node_by_idx[int(right)] if right is not None else None unitig = extend_unitig(unitig, d2g, prev_node, current_node) unitig = extend_unitig(unitig, d2g, prev_node, current_node) return unitig return unitig ... @@ -155,10 +160,8 @@ def compute_unitig_from(d2g, node): ... @@ -155,10 +160,8 @@ def compute_unitig_from(d2g, node): @param current_node Node to add into the unitig and used to select the next node to add. If not set, stop the extension. @param current_node Node to add into the unitig and used to select the next node to add. If not set, stop the extension. @return Return the modified unitig. @return Return the modified unitig. """ """ def extend_unitig(unitig, d2g, prev_node, current_node): def extend_unitig(unitig, d2g, prev_node, current_node): if current_node == None: if current_node is None: return unitig return unitig # Add the node # Add the node ... ...
 ... @@ -5,49 +5,48 @@ import networkx as nx ... @@ -5,49 +5,48 @@ import networkx as nx """ """ class Path(list): class Path(list): def __init__(self, udgs=[]): def __init__(self): super(Path, self).__init__() super(Path, self).__init__() self.udgs = [x for x in udgs] self.penalty = 0 def add_udgs(self, udgs): def add_udgs(self, udgs): self.udgs.extend(udgs) if len(udgs) == 0: return # Special case for previously empty path if len(self) == 0: # 4 because it's the ideal case (1 node of difference with same length on 1 shift. self.penalty = 4 self.append(udgs[0]) udgs = udgs[1:] # Add udg one by one for udg in udgs: # Compute distance dist = udg.distance_to(self[-1]) # Update penalty regarding distance self.penalty += pow(dist, 2) # Add the node self.append(udg) def add_path(self, path): def add_path(self, path): self.add_udgs(path.udgs) self.add_udgs(path.udgs) def revert(self): def revert(self): self.udgs = [x for x in self.udgs[::-1]] self.reverse() def get_penalty(self, d2g): penalty = 0 for idx, node in enumerate(self.udgs[1:]): prev_node = self.udgs[idx-1] penalty += pow() return penalty def normalized_penalty(self): return self.penalty / len(self) def __repr__(self): return f"[{','.join([str(x) for x in self.udgs])}]" class Unitig(Path): class Unitig(Path): def __init__(self, udgs=[]): def __init__(self): super(Unitig, self).__init__(udgs) super(Unitig, self).__init__() def add_left(self, node): self.udgs.insert(0,node) def add_right(self, node): def add_right(self, udg): self.udgs.append(node) self.add_udgs([udg])
 ... @@ -38,6 +38,9 @@ def main(): ... @@ -38,6 +38,9 @@ def main(): largest_component = d2g.subgraph(largest_component_nodes) largest_component = d2g.subgraph(largest_component_nodes) unitigs = compute_unitigs(largest_component) unitigs = compute_unitigs(largest_component) for ut in unitigs: print(ut.normalized_penalty(), len(ut)) # Write the simplified graph # Write the simplified graph # nx.write_gexf(d2g.nx_graph, args.output_d2_name) # nx.write_gexf(d2g.nx_graph, args.output_d2_name) ... ...
 ... @@ -169,7 +169,7 @@ class Dgraph(object): ... @@ -169,7 +169,7 @@ class Dgraph(object): def __lt__(self, other): def __lt__(self, other): my_tuple = (self.get_link_divergence(), self.get_optimal_score()) my_tuple = (self.get_link_divergence(), self.get_optimal_score()) other_tuple = (other.get_link_divergence(), other.get_optimal_score()) other_tuple = (other.get_link_divergence(), other.get_optimal_score()) return (my_tuple < other_tuple) return my_tuple < other_tuple def __hash__(self): def __hash__(self): ... ...
 import networkx as nx
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!