Commit 1bc64e01 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

scored unitig extraction ok

parent e7f0a973
...@@ -91,6 +91,7 @@ def filter_singeltons(graph): ...@@ -91,6 +91,7 @@ def filter_singeltons(graph):
""" Compute the unambiguous paths in a d2g. The d2g must not contain singletons. """ Compute the unambiguous paths in a d2g. The d2g must not contain singletons.
The unitigs are sorted by increasing penalty first and decreasing size second.
@param d2g a d2g graph @param d2g a d2g graph
@return a list of unitigs @return a list of unitigs
""" """
...@@ -117,6 +118,9 @@ def compute_unitigs(d2g): ...@@ -117,6 +118,9 @@ def compute_unitigs(d2g):
for node in unitig: for node in unitig:
used_nodes[node.idx] = True used_nodes[node.idx] = True
# Sort the unitigs
unitigs.sort(key=lambda x: (x.normalized_penalty(), -len(x)))
return unitigs return unitigs
...@@ -127,7 +131,8 @@ def compute_unitigs(d2g): ...@@ -127,7 +131,8 @@ def compute_unitigs(d2g):
@return The constructed unitig @return The constructed unitig
""" """
def compute_unitig_from(d2g, node): def compute_unitig_from(d2g, node):
unitig = Unitig(udgs=[node]) unitig = Unitig()
unitig.add_udgs([node])
if d2g.degree(str(node.idx)) == 2: if d2g.degree(str(node.idx)) == 2:
left, right = d2g.neighbors(str(node.idx)) left, right = d2g.neighbors(str(node.idx))
else: else:
...@@ -142,7 +147,7 @@ def compute_unitig_from(d2g, node): ...@@ -142,7 +147,7 @@ def compute_unitig_from(d2g, node):
# Extends second side # Extends second side
prev_node = node prev_node = node
current_node = d2g.node_by_idx[int(right)] if right != None else None current_node = d2g.node_by_idx[int(right)] if right is not None else None
unitig = extend_unitig(unitig, d2g, prev_node, current_node) unitig = extend_unitig(unitig, d2g, prev_node, current_node)
return unitig return unitig
...@@ -155,10 +160,8 @@ def compute_unitig_from(d2g, node): ...@@ -155,10 +160,8 @@ def compute_unitig_from(d2g, node):
@param current_node Node to add into the unitig and used to select the next node to add. If not set, stop the extension. @param current_node Node to add into the unitig and used to select the next node to add. If not set, stop the extension.
@return Return the modified unitig. @return Return the modified unitig.
""" """
def extend_unitig(unitig, d2g, prev_node, current_node): def extend_unitig(unitig, d2g, prev_node, current_node):
if current_node == None: if current_node is None:
return unitig return unitig
# Add the node # Add the node
......
...@@ -5,49 +5,48 @@ import networkx as nx ...@@ -5,49 +5,48 @@ import networkx as nx
""" """
class Path(list): class Path(list):
def __init__(self, udgs=[]): def __init__(self):
super(Path, self).__init__() super(Path, self).__init__()
self.udgs = [x for x in udgs] self.penalty = 0
def add_udgs(self, udgs): def add_udgs(self, udgs):
self.udgs.extend(udgs) if len(udgs) == 0:
return
# Special case for previously empty path
if len(self) == 0:
# 4 because it's the ideal case (1 node of difference with same length on 1 shift.
self.penalty = 4
self.append(udgs[0])
udgs = udgs[1:]
# Add udg one by one
for udg in udgs:
# Compute distance
dist = udg.distance_to(self[-1])
# Update penalty regarding distance
self.penalty += pow(dist, 2)
# Add the node
self.append(udg)
def add_path(self, path): def add_path(self, path):
self.add_udgs(path.udgs) self.add_udgs(path.udgs)
def revert(self): def revert(self):
self.udgs = [x for x in self.udgs[::-1]] self.reverse()
def get_penalty(self, d2g):
penalty = 0
for idx, node in enumerate(self.udgs[1:]):
prev_node = self.udgs[idx-1]
penalty += pow()
return penalty def normalized_penalty(self):
return self.penalty / len(self)
def __repr__(self):
return f"[{','.join([str(x) for x in self.udgs])}]"
class Unitig(Path): class Unitig(Path):
def __init__(self, udgs=[]): def __init__(self):
super(Unitig, self).__init__(udgs) super(Unitig, self).__init__()
def add_left(self, node):
self.udgs.insert(0,node)
def add_right(self, node): def add_right(self, udg):
self.udgs.append(node) self.add_udgs([udg])
...@@ -38,6 +38,9 @@ def main(): ...@@ -38,6 +38,9 @@ def main():
largest_component = d2g.subgraph(largest_component_nodes) largest_component = d2g.subgraph(largest_component_nodes)
unitigs = compute_unitigs(largest_component) unitigs = compute_unitigs(largest_component)
for ut in unitigs:
print(ut.normalized_penalty(), len(ut))
# Write the simplified graph # Write the simplified graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name) # nx.write_gexf(d2g.nx_graph, args.output_d2_name)
......
...@@ -169,7 +169,7 @@ class Dgraph(object): ...@@ -169,7 +169,7 @@ class Dgraph(object):
def __lt__(self, other): def __lt__(self, other):
my_tuple = (self.get_link_divergence(), self.get_optimal_score()) my_tuple = (self.get_link_divergence(), self.get_optimal_score())
other_tuple = (other.get_link_divergence(), other.get_optimal_score()) other_tuple = (other.get_link_divergence(), other.get_optimal_score())
return (my_tuple < other_tuple) return my_tuple < other_tuple
def __hash__(self): def __hash__(self):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment