Commit e8599455 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

cleaning workspace

parent ade48191
import sys
print(sys.readline())
......@@ -49,6 +49,9 @@ class D2Graph(nx.Graph):
return G
def clone(self):
return self.subgraph(list(self.nodes()))
def construct_from_barcodes(self, index_size=3, verbose=True, debug=False):
# Compute all the d-graphs
......@@ -79,6 +82,14 @@ class D2Graph(nx.Graph):
self.compute_distances()
def get_covering_variables(self, udg):
variables = []
for e in udg.edges:
variables.append(self.barcode_edge_idxs[e])
return frozenset(variables)
def save(self, filename):
with open(filename, "w") as fp:
# First line nb_nodes nb_cov_var
......@@ -245,4 +256,3 @@ class D2Graph(nx.Graph):
for dmer in removable_dmers:
del self.index[dmer]
......@@ -15,6 +15,12 @@ class Path(list):
if len(udgs) == 0:
return
for udg in udgs:
# Register edges
for barcode_edge in udg.edges:
edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
self.covering_variables[edge_idx] += 1
# Special case for previously empty path
if len(self) == 0:
# 4 because it's the ideal case (1 node of difference with same length on 1 shift.
......@@ -31,14 +37,6 @@ class Path(list):
# Add the node
self.append(udg)
# Register edges
for barcode_edge in udg.edges:
edge_idx = self.d2g.barcode_edge_idxs[barcode_edge]
self.covering_variables[edge_idx] += 1
def revert(self):
self.reverse()
def normalized_penalty(self):
return self.penalty / len(self)
......@@ -63,6 +61,16 @@ class Path(list):
return num_covered
def save_path(self, filename):
d2p = self.d2g.subgraph([str(x.idx) for x in self])
nx.write_gexf(d2p, filename)
def save_path_in_graph(self, filename):
d2c = self.d2g.clone()
for idx, udg in enumerate(self):
d2c.nodes[str(udg.idx)]["path"] = idx
nx.write_gexf(d2c, filename)
class Unitig(Path):
......
......@@ -37,10 +37,32 @@ def main():
# Take the principal component
largest_component_nodes = max(nx.connected_components(d2g), key=len)
largest_component = d2g.subgraph(largest_component_nodes)
unitigs = compute_unitigs(largest_component)
path = pa.construct_path_from_unitigs(unitigs, largest_component)
print(path.covering_score())
import path_optimization as po
# Start optimization
optimizer = po.Optimizer(largest_component)
optimizer.init_random_solutions(1)
solution = optimizer.solutions[0]
print(solution)
print(solution.covering_score())
optimizer.extends_until_end(solution)
print(solution.covering_score())
solution.save_path_in_graph("data/test_d2_path.gexf")
solution.save_path("data/test_path.gexf")
# unitigs = compute_unitigs(largest_component)
# path = pa.construct_path_from_unitigs(unitigs, largest_component)
# print("\n".join([str(x) for x in path]))
# print(path.covering_score())
# diameter = nx.diameter(largest_component)
# print(diameter)
# Write the simplified graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name)
......
#!/usr/bin/env python3
import sys
import math
import networkx as nx
import itertools
import d_graph as dg
import d2_graph as d2
from d2_algorithms import greedy_reduct, filter_singeltons, compute_unitigs, compute_path_from_unitigs
def main():
# Parsing the input file
filename = sys.argv[1]
G = None
if filename.endswith('.graphml'):
G = nx.read_graphml(filename)
elif filename.endswith('.gexf'):
G = nx.read_gexf(filename)
d2g = d2.D2Graph(G, index_size=8)
d2g.save("data/optimization.tsv")
G, names = d2g.to_nx_graph()
nx.write_gexf(G, "data/d2_graph.gexf")
print("Greedy reduction of the graph")
greedy = greedy_reduct(d2g)
nx.write_gexf(greedy, "data/d2_graph_greedy.gexf")
# print("Compute unitigs from greedy reducted graph")
# unitigs = compute_unitigs(greedy, d2g)
# # Compute greedy complete path from unitigs regarding most efficient path between them
# path = compute_path_from_unitigs(d2g, unitigs)
# path.save_d2(d2g, "data/d2_greedy_path.gexf")
if __name__ == "__main__":
main()
import networkx as nx
G = nx.Graph()
labels = list(range(30))
# create nodes
names = {}
for lab in labels:
G.add_node(lab)
names[lab] = lab
nx.set_node_attributes(G, names, "test")
# insert duplications
labels.insert(23, 7)
print(labels)
# create links
for i, lab in enumerate(labels):
for j in range(i+1, min(i+4, len(labels))):
G.add_edge(lab,labels[j])
nx.write_graphml(G, "simple_duplicated_3links.graphml")
......@@ -29,7 +29,6 @@ def construct_path_from_unitigs(unitigs, d2g):
unitigs = [utg for utg in unitigs if path.covering_difference(utg) > 0]
print()
return path
......@@ -59,7 +58,7 @@ def _search_way_to_next_unitig(path, unitigs, d2g):
for extension in best_paths:
utg = endpoints[extension[-1]]
# if the utg is in the wrong size
if utg[-1] == path[-1]:
if utg[-1] == extension[-1]:
utg.reverse()
complete_path = Path(d2g)
......@@ -83,6 +82,7 @@ def _search_way_to_next_unitig(path, unitigs, d2g):
"""
def _search_endpoint(start_udg, targets, d2g, forbidden_udgs):
marked_nodes = {x: (None, None) for x in forbidden_udgs}
covered_variables = set(forbidden_udgs.covering_variables.keys())
# Init Dijkstra
to_explore = [start_udg]
......@@ -94,8 +94,17 @@ def _search_endpoint(start_udg, targets, d2g, forbidden_udgs):
# Select min penalty in to_explore
current = min(to_explore, key=lambda x: marked_nodes[x][0])
current_penalty = marked_nodes[current][0]
to_explore.remove(current)
# Filter neighbors by there covering values
neighbors = d2g.neighbors(str(current.idx))
filtered_neighbors = []
for n in neighbors:
nei = d2g.node_by_idx[int(n)]
if len(d2g.get_covering_variables([nei]) - covered_variables) > 0:
filtered_neighbors.append(n)
neighbors = filtered_neighbors
# Explore all the neighbors of the current node.
for nei_idx in neighbors:
nei_udg = d2g.node_by_idx[int(nei_idx)]
......
import random
from d2_path import Path
class Optimizer:
def __init__(self, d2g):
self.d2g = d2g
self.solutions = []
def init_random_solutions(self, nb_solutions):
for _ in range(nb_solutions):
rnd_sol = Solution(self.d2g)
rnd_sol.random_init()
self.solutions.append(rnd_sol)
def extends_until_end(self, solution):
while self.extends(solution):
print(len(solution))
solution.reverse()
print("reverse the solution")
while self.extends(solution):
print(len(solution))
def extends(self, solution):
# Get all the neighbors
cur_id = str(solution[-1].idx)
neighbors = [self.d2g.node_by_idx[int(x)] for x in self.d2g.neighbors(cur_id) if
self.d2g.node_by_idx[int(x)] not in solution]
# filter the neighbors if they are not contributing to variable coverage
# filtered = []
current_vars = frozenset([x for x, y in solution.covering_variables.items() if y > 0])
# for nei_id in neighbors:
# nei = self.d2g.node_by_idx[int(nei_id)]
# variables = self.d2g.get_covering_variables(nei)
# new_vars = variables - current_vars
# if len(new_vars) > 0:
# filtered.append(nei)
if len(neighbors) == 0:
return False
# Choose using the multiple optimization directions
next_udg = min(neighbors,
key=lambda x: (1 if len(self.d2g.get_covering_variables(x) - current_vars) == 0 else 0,
self.d2g[str(x.idx)][cur_id]["distance"],
x.get_link_divergence()))
solution.add_path([next_udg])
return True
class Solution(Path):
def __init__(self, d2g):
super(Solution, self).__init__(d2g)
def random_init(self):
random_node_idx = random.choice(list(self.d2g.nodes()))
random_node = self.d2g.node_by_idx[int(random_node_idx)]
self.clear()
self.add_path([random_node])
networkx>=2.2
termcolor>=1.1
bidict>=0.18
\ No newline at end of file
import networkx as nx
G = nx.path_graph(3)
print(G.edges(data=True))
edge_data = G[0][1]
edge_data["test"] = 2
# nx.set_edge_attributes(G, 0, "test")
# edge = list(G.edges(data=True))[0]
print(G.edges(data=True))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment