Commit 383385f7 by Yoann Dufresne

### compute greedy simplification of d2 graph

parent fcdd78b1
 import networkx as nx def greedy_reduct(d2): """ Compute a graph where, for each node, the neighbors are the closest neighbors. """ gG = nx.Graph() for node in d2.nodes: gG.add_node(node) for dgraph, node in d2.nodes.items(): if not dgraph in d2.distances: continue distances = d2.distances[dgraph] min_dist = min(distances.values()) for n_dgraph, dist in distances.items(): if dist == min_dist: gG.add_edge(node, d2.nodes[n_dgraph]) return gG def filter_singeltons(graph): """ Remove the isolated nodes from graph. """ nodelist = list(graph.nodes()) for node in nodelist: if len(graph[node]) == 0: graph.remove_node(node) return graph def unitigs(graph): """ Compute the unambiguious paths """ unitigs = [] return unitigs
 ... ... @@ -6,15 +6,26 @@ from d_graph import compute_all_max_d_graphs class D2Graph(object): """D2Graph (read it (d-graph)²)""" def __init__(self, graph, index_size=10): def __init__(self, graph, index_size=8, verbose=True): super(D2Graph, self).__init__() self.graph = graph # Compute all the d-graphs if verbose: print("Compute the unit d-graphs") self.d_graphs = compute_all_max_d_graphs(self.graph) # Index all the d-graphes if verbose: print("Compute the dmer index") self.index = self.create_index_from_tuples(index_size) # Compute node distances for pair of dgraphs that share at least 1 dmer. if verbose: print("Compute a subset of distances") self.distances = self.compute_distances() # Create the graph self.graph, self.nodes = self.to_nx_graph() ... ... @@ -39,6 +50,38 @@ class D2Graph(object): return index def compute_distances(self): distances = {} for dmer, dgraphs in self.index.items(): if len(dgraphs) == 1: continue for idx1, dg1 in enumerate(dgraphs): # Add dist dict for dg1 if not dg1 in distances: distances[dg1] = {} for idx2 in range(idx1+1, len(dgraphs)): dg2 = dgraphs[idx2] if dg1 == dg2: continue # Add dist dict for dg2 if not dg2 in distances: distances[dg2] = {} # Distance computing and adding in the dist dicts d = dg1.distance_to(dg2) distances[dg1][dg2] = d distances[dg2][dg1] = d if len(distances[dg1]) == 0: del distances[dg1] return distances def create_index_ordered(self): index = {} ... ... @@ -92,4 +135,5 @@ class D2Graph(object): return G, nodes \ No newline at end of file
 class Path(object): def __init__(self): super(Path, self).__init__() self.nodes = [] def get_score(self): return 0 # class D2_Path_finder(object):
 ... ... @@ -12,6 +12,7 @@ class Dgraph(object): self.score = 0 self.halves = [None,None] self.connexity = [None,None] self.nodes = [center] """ Compute the d-graph quality (score) according to the connectivity between the two halves. ... ... @@ -23,6 +24,7 @@ class Dgraph(object): self.score = 0 self.halves[0] = h1 self.halves[1] = h2 self.nodes = sorted([self.center] + h1 + h2) self.connexity[0] = {key:0 for key in self.halves[0]} self.connexity[1] = {key:0 for key in self.halves[1]} ... ... @@ -74,6 +76,26 @@ class Dgraph(object): return frozenset(self.to_list()) def distance_to(self, dgraph): other_nodes = dgraph.nodes dist = 0 idx1, idx2 = 0, 0 while(idx1 != len(self.nodes) and idx2 != len(other_nodes)): if self.nodes[idx1] == other_nodes[idx2]: idx1 += 1 idx2 += 1 else: dist += 1 if self.nodes[idx1] < other_nodes[idx2]: idx1 += 1 else: idx2 += 1 dist += len(self.nodes) - idx1 + len(other_nodes) - idx2 return dist def __eq__(self, other): return self.to_ordered_lists() == other.to_ordered_lists() ... ...
 ... ... @@ -9,6 +9,7 @@ import itertools import d_graph as dg import d2_graph as d2 from d2_algorithms import greedy_reduct, filter_singeltons def main(): # Parsing the input file ... ... @@ -23,5 +24,8 @@ def main(): G, names = d2g.to_nx_graph() nx.write_gexf(G, "data/d2_graph.gexf") greedy = filter_singeltons(greedy_reduct(d2g)) nx.write_gexf(greedy, "data/d2_graph_greedy.gexf") if __name__ == "__main__": main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!