diff --git a/deconvolution/d2_algorithms.py b/deconvolution/d2_algorithms.py index aba93f4fb36d53dfb58784a85937e9a42c4cc38a..77a5f771eb354552ed35cc0e80039e7987472442 100644 --- a/deconvolution/d2_algorithms.py +++ b/deconvolution/d2_algorithms.py @@ -35,10 +35,74 @@ def filter_singeltons(graph): return graph -def unitigs(graph): +def compute_unitigs(graph): """ Compute the unambiguious paths """ unitigs = [] + nodelist = list(graph.nodes()) + already_used = {node:False for node in nodelist} + + for node in nodelist: + # skip the node if already in use in some unitig + if already_used[node]: + continue + + # Start the unitig + unitig = [node] + already_used[node] = True + # Save the unitig + unitigs.append(unitig) + + # If 1 or 2 neighbors, part of the unitig + # Change unitig if not + if len(graph[node]) > 2 or len(graph[node]) == 0: + continue + + # Randomly determine the right and a left neighbors + neighbors = list(graph[node]) + if len(neighbors) == 1: + right = neighbors[0] + left = None + else: + right = neighbors[0] + left = neighbors[1] + + # --- Explore the RIGHT side of the unitig --- + previous, current = node, right + while len(graph[current]) == 2 and not already_used[current]: + unitig.append(current) + already_used[current] = True + + # Go to next neighbor + neighbors = list(graph[current]) + new_node = neighbors[0] if neighbors[0] != previous else neighbors[1] + previous = current + current = new_node + + # Add the right extremity of the unitig + if len(graph[current]) == 1: + unitig.append(current) + already_used[current] = True + + # --- Explore the LEFT side of the unitig --- + if left == None: + continue + + previous, current = node, left + while len(graph[current]) == 2 and not already_used[current]: + unitig.insert(0, current) + already_used[current] = True + + # Go to next neighbor + neighbors = list(graph[current]) + new_node = neighbors[0] if neighbors[0] != previous else neighbors[1] + previous = current + current = new_node + + # Add the right extremity of the unitig + if len(graph[current]) == 1: + unitig.insert(0,current) + already_used[current] = True return unitigs diff --git a/deconvolution/d_graph.py b/deconvolution/d_graph.py index e47d3ee49c06a62fd513f46a61173d5e995efffe..fc02ab862c84680ae78da56ba6dbc9c815180733 100644 --- a/deconvolution/d_graph.py +++ b/deconvolution/d_graph.py @@ -125,10 +125,6 @@ class Dgraph(object): rev_str = ",".join([f"[{'-'.join(l)}]" for l in rev_uniq_lst]) rev_hash = rev_str.__hash__() - # print() - # print(fwd_str) - # print(rev_str) - return int(min(fwd_hash, rev_hash)) diff --git a/deconvolution/deconvolve.py b/deconvolution/deconvolve.py index 5a6df4cb1606b39f0a7b44924e327658754910de..9e7069124064ec798244a7c13ccf81c0db9750ed 100755 --- a/deconvolution/deconvolve.py +++ b/deconvolution/deconvolve.py @@ -9,7 +9,7 @@ import itertools import d_graph as dg import d2_graph as d2 -from d2_algorithms import greedy_reduct, filter_singeltons +from d2_algorithms import greedy_reduct, filter_singeltons, compute_unitigs def main(): # Parsing the input file @@ -24,8 +24,16 @@ def main(): G, names = d2g.to_nx_graph() nx.write_gexf(G, "data/d2_graph.gexf") + + print("Greedy reduction of the graph") greedy = filter_singeltons(greedy_reduct(d2g)) nx.write_gexf(greedy, "data/d2_graph_greedy.gexf") + print("Compute unitigs from greedy reducted graph") + unitigs = compute_unitigs(greedy) + print("\n".join([str(x) for x in unitigs])) + print(len(unitigs)) + + if __name__ == "__main__": main()