Commit 31a82afc authored by Rayan Chikhi's avatar Rayan Chikhi
Browse files

fixes

parent 99fef3be
......@@ -28,24 +28,23 @@ def load_graph(filename):
print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit()
""" return a random path in G starting in u and having n nodes """
import random
def findRandomPath(G,u,n):
def findRandomPath(G,u,n,previous_path_nodes=set()):
if n==0:
return [u]
path = [u]
poss_neigh = list(G.neighbors(u))
while u in path:
if len(poss_neigh) == 0: return None
neighbor = random.choice(poss_neigh)
poss_neigh.remove(neighbor)
path = findRandomPath(G,neighbor,n-1)
if path is None: return None
poss_neigh = list(set(G.neighbors(u)) - previous_path_nodes)
if len(poss_neigh) == 0: return None
neighbor = random.choice(poss_neigh)
new_previous_path_nodes = previous_path_nodes | set([u])
path = findRandomPath(G,neighbor,n-1, new_previous_path_nodes)
if path is None: return None
return [u]+path
import itertools
def is_there_path(central_nodes,overlap_length):
def is_there_path_acc(central_nodes,overlap_length):
for mols in itertools.product(*central_nodes):
#print(mols)
last_end = None
......@@ -78,7 +77,7 @@ def is_coherent_path(central_nodes, overlap_length):
for node in central_nodes:
cur_node_mols = central_node_to_molecules(node)
mols += [cur_node_mols]
return is_there_path(mols,overlap_length)
return is_there_path_acc(mols,overlap_length)
graph = None
def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100):
......@@ -87,9 +86,12 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
nb_good_paths = 0
for node in graph.nodes():
nb_paths = 0
seen_paths = set()
for _ in range(max_paths_per_node):
path = findRandomPath(graph,node,path_len)
if path is None: continue
if tuple(sorted(path)) in seen_paths: continue # avoids looking at the same path twice
seen_paths.add(tuple(sorted(path)))
#print("path",path)
central_nodes = [graph.nodes[x]['udg'].split()[0] for x in path]
#print(path,central_nodes)
......@@ -99,6 +101,8 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
nb_bad_paths += 1
print("accuracy for l=%d:" % path_len,nb_good_paths / (nb_good_paths + nb_bad_paths))
# ---- sensitivity evaluation
def is_there_path(graph,molecules_to_nodes,sought_path):
possible_central_nodes = []
for mol in sought_path:
......@@ -108,7 +112,7 @@ def is_there_path(graph,molecules_to_nodes,sought_path):
if nx.is_connected(graph.subgraph(mols)):
#print("found connected path",mols)
return True
print("found no connected paths",sought_path)
#print("found no connected paths",sought_path)
return False
def evaluate_sensitivity_paths(path_len,overlap_length=7000):
......@@ -140,8 +144,7 @@ def main():
graph = load_graph(args.filename)
p = Pool(4)
#p.map(evaluate_accuracy_paths, [1,2,3,4])
p.map(evaluate_accuracy_paths, [1,2,3,4])
p.map(evaluate_sensitivity_paths, [1,2,3,4])
if __name__ == "__main__":
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment