diff --git a/deconvolution/main/d2_path_evaluation.py b/deconvolution/main/d2_path_evaluation.py index 010d3fa4a8f51dfa4395594c476a5507dbd711d3..5c8299e76f4dcbf8e62b73f573c6ce24ab19e9ef 100755 --- a/deconvolution/main/d2_path_evaluation.py +++ b/deconvolution/main/d2_path_evaluation.py @@ -28,24 +28,23 @@ def load_graph(filename): print("Wrong file format. Require graphml or gefx format", file=sys.stderr) exit() +""" return a random path in G starting in u and having n nodes """ import random -def findRandomPath(G,u,n): +def findRandomPath(G,u,n,previous_path_nodes=set()): if n==0: return [u] path = [u] - poss_neigh = list(G.neighbors(u)) - while u in path: - if len(poss_neigh) == 0: return None - neighbor = random.choice(poss_neigh) - poss_neigh.remove(neighbor) - path = findRandomPath(G,neighbor,n-1) - if path is None: return None + poss_neigh = list(set(G.neighbors(u)) - previous_path_nodes) + if len(poss_neigh) == 0: return None + neighbor = random.choice(poss_neigh) + new_previous_path_nodes = previous_path_nodes | set([u]) + path = findRandomPath(G,neighbor,n-1, new_previous_path_nodes) + if path is None: return None return [u]+path - import itertools -def is_there_path(central_nodes,overlap_length): +def is_there_path_acc(central_nodes,overlap_length): for mols in itertools.product(*central_nodes): #print(mols) last_end = None @@ -78,7 +77,7 @@ def is_coherent_path(central_nodes, overlap_length): for node in central_nodes: cur_node_mols = central_node_to_molecules(node) mols += [cur_node_mols] - return is_there_path(mols,overlap_length) + return is_there_path_acc(mols,overlap_length) graph = None def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100): @@ -87,9 +86,12 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100) nb_good_paths = 0 for node in graph.nodes(): nb_paths = 0 + seen_paths = set() for _ in range(max_paths_per_node): path = findRandomPath(graph,node,path_len) if path is None: continue + if tuple(sorted(path)) in seen_paths: continue # avoids looking at the same path twice + seen_paths.add(tuple(sorted(path))) #print("path",path) central_nodes = [graph.nodes[x]['udg'].split()[0] for x in path] #print(path,central_nodes) @@ -99,6 +101,8 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100) nb_bad_paths += 1 print("accuracy for l=%d:" % path_len,nb_good_paths / (nb_good_paths + nb_bad_paths)) +# ---- sensitivity evaluation + def is_there_path(graph,molecules_to_nodes,sought_path): possible_central_nodes = [] for mol in sought_path: @@ -108,7 +112,7 @@ def is_there_path(graph,molecules_to_nodes,sought_path): if nx.is_connected(graph.subgraph(mols)): #print("found connected path",mols) return True - print("found no connected paths",sought_path) + #print("found no connected paths",sought_path) return False def evaluate_sensitivity_paths(path_len,overlap_length=7000): @@ -140,8 +144,7 @@ def main(): graph = load_graph(args.filename) p = Pool(4) - #p.map(evaluate_accuracy_paths, [1,2,3,4]) - + p.map(evaluate_accuracy_paths, [1,2,3,4]) p.map(evaluate_sensitivity_paths, [1,2,3,4]) if __name__ == "__main__":