Commit 31a82afc authored by Rayan Chikhi's avatar Rayan Chikhi
Browse files

fixes

parent 99fef3be
...@@ -28,24 +28,23 @@ def load_graph(filename): ...@@ -28,24 +28,23 @@ def load_graph(filename):
print("Wrong file format. Require graphml or gefx format", file=sys.stderr) print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit() exit()
""" return a random path in G starting in u and having n nodes """
import random import random
def findRandomPath(G,u,n): def findRandomPath(G,u,n,previous_path_nodes=set()):
if n==0: if n==0:
return [u] return [u]
path = [u] path = [u]
poss_neigh = list(G.neighbors(u)) poss_neigh = list(set(G.neighbors(u)) - previous_path_nodes)
while u in path: if len(poss_neigh) == 0: return None
if len(poss_neigh) == 0: return None neighbor = random.choice(poss_neigh)
neighbor = random.choice(poss_neigh) new_previous_path_nodes = previous_path_nodes | set([u])
poss_neigh.remove(neighbor) path = findRandomPath(G,neighbor,n-1, new_previous_path_nodes)
path = findRandomPath(G,neighbor,n-1) if path is None: return None
if path is None: return None
return [u]+path return [u]+path
import itertools import itertools
def is_there_path(central_nodes,overlap_length): def is_there_path_acc(central_nodes,overlap_length):
for mols in itertools.product(*central_nodes): for mols in itertools.product(*central_nodes):
#print(mols) #print(mols)
last_end = None last_end = None
...@@ -78,7 +77,7 @@ def is_coherent_path(central_nodes, overlap_length): ...@@ -78,7 +77,7 @@ def is_coherent_path(central_nodes, overlap_length):
for node in central_nodes: for node in central_nodes:
cur_node_mols = central_node_to_molecules(node) cur_node_mols = central_node_to_molecules(node)
mols += [cur_node_mols] mols += [cur_node_mols]
return is_there_path(mols,overlap_length) return is_there_path_acc(mols,overlap_length)
graph = None graph = None
def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100): def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100):
...@@ -87,9 +86,12 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100) ...@@ -87,9 +86,12 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
nb_good_paths = 0 nb_good_paths = 0
for node in graph.nodes(): for node in graph.nodes():
nb_paths = 0 nb_paths = 0
seen_paths = set()
for _ in range(max_paths_per_node): for _ in range(max_paths_per_node):
path = findRandomPath(graph,node,path_len) path = findRandomPath(graph,node,path_len)
if path is None: continue if path is None: continue
if tuple(sorted(path)) in seen_paths: continue # avoids looking at the same path twice
seen_paths.add(tuple(sorted(path)))
#print("path",path) #print("path",path)
central_nodes = [graph.nodes[x]['udg'].split()[0] for x in path] central_nodes = [graph.nodes[x]['udg'].split()[0] for x in path]
#print(path,central_nodes) #print(path,central_nodes)
...@@ -99,6 +101,8 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100) ...@@ -99,6 +101,8 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
nb_bad_paths += 1 nb_bad_paths += 1
print("accuracy for l=%d:" % path_len,nb_good_paths / (nb_good_paths + nb_bad_paths)) print("accuracy for l=%d:" % path_len,nb_good_paths / (nb_good_paths + nb_bad_paths))
# ---- sensitivity evaluation
def is_there_path(graph,molecules_to_nodes,sought_path): def is_there_path(graph,molecules_to_nodes,sought_path):
possible_central_nodes = [] possible_central_nodes = []
for mol in sought_path: for mol in sought_path:
...@@ -108,7 +112,7 @@ def is_there_path(graph,molecules_to_nodes,sought_path): ...@@ -108,7 +112,7 @@ def is_there_path(graph,molecules_to_nodes,sought_path):
if nx.is_connected(graph.subgraph(mols)): if nx.is_connected(graph.subgraph(mols)):
#print("found connected path",mols) #print("found connected path",mols)
return True return True
print("found no connected paths",sought_path) #print("found no connected paths",sought_path)
return False return False
def evaluate_sensitivity_paths(path_len,overlap_length=7000): def evaluate_sensitivity_paths(path_len,overlap_length=7000):
...@@ -140,8 +144,7 @@ def main(): ...@@ -140,8 +144,7 @@ def main():
graph = load_graph(args.filename) graph = load_graph(args.filename)
p = Pool(4) p = Pool(4)
#p.map(evaluate_accuracy_paths, [1,2,3,4]) p.map(evaluate_accuracy_paths, [1,2,3,4])
p.map(evaluate_sensitivity_paths, [1,2,3,4]) p.map(evaluate_sensitivity_paths, [1,2,3,4])
if __name__ == "__main__": if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment