Commit 31633d6a authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

Change the method to generate all the possible d-graph and the way to filter them

parent 71d73af0
......@@ -2,7 +2,7 @@ import networkx as nx
import itertools
from bidict import bidict
from d_graph import compute_all_max_d_graphs
from d_graph import compute_all_max_d_graphs, filter_dominated
class D2Graph(object):
......@@ -15,6 +15,7 @@ class D2Graph(object):
if verbose:
print("Compute the unit d-graphs")
self.d_graphs_per_node = compute_all_max_d_graphs(self.graph, debug=debug)
self.d_graphs_per_node = filter_dominated(self.d_graphs_per_node)
self.all_d_graphs = []
for d_graphs in self.d_graphs_per_node.values():
self.all_d_graphs.extend(d_graphs)
......
......@@ -67,7 +67,7 @@ class Dgraph(object):
def get_link_divergence(self):
return abs((self.score / self.get_optimal_score()) - 1)
return abs(self.score - self.get_optimal_score())
def get_optimal_score(self):
......@@ -118,6 +118,29 @@ class Dgraph(object):
return dist
""" Verify if dg1 is dominated by dg2. The domination is determined by two points: All the nodes
of dg1 are part of dg2 and the divergeance of dg1 is greater than dg2.
@param dg1 (resp dg2) A d_graph object.
@return True if dg1 is dominated by dg2.
"""
def is_dominated(self, dg):
dg1_nodes = frozenset(self.to_list())
dg2_nodes = frozenset(dg.to_list())
# domination first condition: inclusion of all the nodes
if not dg1_nodes.issubset(dg2_nodes):
return False
# domination second condition
if len(dg1_nodes) == len(dg2_nodes):
if self.get_link_divergence() < dg.get_link_divergence():
return True
elif self.get_link_divergence() <= dg.get_link_divergence():
return True
return False
def __eq__(self, other):
return self.to_ordered_lists() == other.to_ordered_lists()
......@@ -164,13 +187,13 @@ class Dgraph(object):
@return A dictionary associating each node to its list of all possible d-graphs. The d-graphs are sorted by decreasing ratio.
"""
def compute_all_max_d_graphs(graph, n_best=100, debug=False):
d_graphes = {}
d_graphs = {}
for node in list(graph.nodes()):
neighbors = list(graph.neighbors(node))
neighbors_graph = nx.Graph(graph.subgraph(neighbors))
node_d_graphes = []
node_d_graphs = []
# Find all the cliques (equivalent to compute all the candidate half d-graph)
cliques = list(nx.find_cliques(neighbors_graph))
......@@ -183,16 +206,66 @@ def compute_all_max_d_graphs(graph, n_best=100, debug=False):
d_graph = Dgraph(node)
d_graph.put_halves(clq1, clq2, neighbors_graph)
optimal_score = d_graph.get_optimal_score()
# For a minimal connection To avoid too much shared nodes
if d_graph.score < optimal_score / 4 or d_graph.score >= 1.6 * optimal_score:
if d_graph.get_link_divergence() > d_graph.get_optimal_score() / 2:
continue
node_d_graphes.append(d_graph)
node_d_graphs.append(d_graph)
# Cut the the distribution queue
d_graphes[node] = sorted(node_d_graphes)
# print(node_d_graphes)
d_graphs[node] = sorted(node_d_graphs)
# print(node_d_graphs)
return d_graphs
""" Add the new dg in the dgs list. If dg is dominated by another dg in the list, then it's
dropped. If any dg in the list is dominated by the dg to add, then, the new dg is added and
all the dominated dg are removed from the list.
@param dg A new dg to add/filter.
@param undominated_dgs_list A list of dg where any of them is dominated by another one.
@return The updated undominated list.
"""
def add_new_dg_regarding_domination(dg, undominated_dgs_list):
to_remove = []
# Search for domination relations
for u_dg in undominated_dgs_list:
if len(to_remove) == 0 and dg.is_dominated(u_dg):
return undominated_dgs_list
elif u_dg.is_dominated(dg):
to_remove.append(u_dg)
# Remove dominated values
for dg2 in to_remove:
undominated_dgs_list.remove(dg2)
# Add the new dg
undominated_dgs_list.append(dg)
return undominated_dgs_list
""" Filter the d-graphs by node. In a list of d-graph centered on a node n, if a d-graph is
completly included in another and have a highest distance score to the optimal, then it is
filtered out.
@param d_graphs All the d-graphs to filter, sorted by central node.
@param in_place If true, modify the content of d_graph with the filtered version. If False,
copy all the content in a new dictionnary.
@return The filtered dictionnary of d-graph per node.
"""
def filter_dominated(d_graphs, in_place=True):
filtered = d_graphs if in_place else {}
# Filter node by node
for node, d_graph_list in d_graphs.items():
filtered_by_node = []
# Filter d-graph by d-graph
for dg in d_graph_list:
add_new_dg_regarding_domination(dg, filtered_by_node)
# Add the non filtered d-graph to the output
filtered[node] = filtered_by_node
return d_graphes
return filtered
......@@ -25,16 +25,16 @@ def main():
G, names = d2g.to_nx_graph()
nx.write_gexf(G, "data/d2_graph.gexf")
print("Greedy reduction of the graph")
greedy = filter_singeltons(greedy_reduct(d2g))
nx.write_gexf(greedy, "data/d2_graph_greedy.gexf")
# print("Greedy reduction of the graph")
# greedy = filter_singeltons(greedy_reduct(d2g))
# nx.write_gexf(greedy, "data/d2_graph_greedy.gexf")
print("Compute unitigs from greedy reducted graph")
unitigs = compute_unitigs(greedy, d2g)
# print("Compute unitigs from greedy reducted graph")
# unitigs = compute_unitigs(greedy, d2g)
# Compute greedy complete path from unitigs regarding most efficient path between them
path = compute_path_from_unitigs(d2g, unitigs)
path.save_d2(d2g, "data/d2_greedy_path.gexf")
# # Compute greedy complete path from unitigs regarding most efficient path between them
# path = compute_path_from_unitigs(d2g, unitigs)
# path.save_d2(d2g, "data/d2_greedy_path.gexf")
if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment