Skip to content
Snippets Groups Projects
Commit 80b70c7a authored by Rayan Chikhi's avatar Rayan Chikhi
Browse files

another wave of bugfixes on the eval script

parent 31a82afc
No related branches found
No related tags found
No related merge requests found
......@@ -28,40 +28,45 @@ def load_graph(filename):
print("Wrong file format. Require graphml or gefx format", file=sys.stderr)
exit()
""" return a random path in G starting in u and having n nodes """
""" return a random path in G starting in u and having n _edges_ """
import random
def findRandomPath(G,u,n,previous_path_nodes=set()):
if n==0:
return [u]
path = [u]
poss_neigh = list(set(G.neighbors(u)) - previous_path_nodes)
if len(poss_neigh) == 0: return None
neighbor = random.choice(poss_neigh)
new_previous_path_nodes = previous_path_nodes | set([u])
path = findRandomPath(G,neighbor,n-1, new_previous_path_nodes)
if path is None: return None
if len(path) != n: return None
return [u]+path
import itertools
def is_there_path_acc(central_nodes,overlap_length):
for mols in itertools.product(*central_nodes):
""" determine, given an ordered list of central nodes, whether there exists a coherent paths of overlapping molecules in them """
def is_there_path_acc(mols_in_central_nodes,min_overlap_length=5000): # don't consider overlaps smaller than 5kbp
for mols in itertools.product(*mols_in_central_nodes):
#print(mols)
last_end = None
last_start = None
good_path = True
for mol in mols:
for mol in sorted(mols):
start,end = mol
if last_end is None:
last_end = end
else:
if start > last_end-overlap_length:
#print("bad path",mols)
good_path = False
break
if last_start is None:
last_start = start
if not (start >= last_start and start <= last_end-min_overlap_length):
#print("bad path",mols)
good_path = False
break
last_end = end
last_start = start
if good_path:
return True
return False
""" converts a central node of a d-graph into its list of molecules (given the ground truth) """
def central_node_to_molecules(nodestr):
# format for a 2-merge: 1:NC_000913.3_298281_313280_0:0:0_0:0:0_2fb/1_NC_000913.3_338611_353610_0:0:0_0:0:0_37b/1
cur_node_mols = []
......@@ -72,15 +77,17 @@ def central_node_to_molecules(nodestr):
cur_node_mols += [(start,end)]
return cur_node_mols
def is_coherent_path(central_nodes, overlap_length):
mols = []
def is_coherent_path(central_nodes,path_len):
mols_in_central_nodes = []
for node in central_nodes:
cur_node_mols = central_node_to_molecules(node)
mols += [cur_node_mols]
return is_there_path_acc(mols,overlap_length)
mols_in_central_nodes += [cur_node_mols]
assert(len(mols_in_central_nodes) == path_len+1)
return is_there_path_acc(mols_in_central_nodes)
""" the main function that tests for accuracy of random paths """
graph = None
def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100):
def evaluate_accuracy_paths(path_len,max_paths_per_node=100):
global graph
nb_bad_paths = 0
nb_good_paths = 0
......@@ -92,10 +99,12 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
if path is None: continue
if tuple(sorted(path)) in seen_paths: continue # avoids looking at the same path twice
seen_paths.add(tuple(sorted(path)))
assert(len(path) == path_len+1)
#print("path",path)
central_nodes = [graph.nodes[x]['udg'].split()[0] for x in path]
assert(len(central_nodes) == path_len+1)
#print(path,central_nodes)
if is_coherent_path(central_nodes,overlap_length):
if is_coherent_path(central_nodes,path_len):
nb_good_paths += 1
else:
nb_bad_paths += 1
......@@ -103,6 +112,8 @@ def evaluate_accuracy_paths(path_len,overlap_length=7000,max_paths_per_node=100)
# ---- sensitivity evaluation
""" given an ordered list of molecules, determine if the graph contains a path of central nodse which have these molecules.
it does that by testing all possible combinations of d-graphs having those molecules in their central nodes """
def is_there_path(graph,molecules_to_nodes,sought_path):
possible_central_nodes = []
for mol in sought_path:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment