Commit 84920ea8 authored by Rayan  CHIKHI's avatar Rayan CHIKHI

initial modifs for interval graphs evaluation acc/sens

parent 4782df1e
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[5000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[5] if "d" not in config else config["d"] # Average coverage of each molecule
N=[10000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[10] if "d" not in config else config["d"] # Average coverage of each molecule
M=[3] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
M_DEV=[1] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
# snake_experiments/simu_0_bar_n500_d5_m2.gexf
......
......@@ -10,6 +10,8 @@ import argparse
from termcolor import colored
import networkx as nx
graph_type = "interval" # will be set to "ecoli" if we detect that it is, in fact, a barcode graph simulated from ecoli
# this affects how we evaluate that path is correct: mols starts absolute diff < 9k for ecoli, < 5--10 for interval graphs
def parse_args():
parser = argparse.ArgumentParser(description='Process some integers.')
......@@ -47,6 +49,7 @@ given as input a list of molecules in central nodes,
return a stripped-down list that only includes molecules that overlap their neighbor
"""
def extract_likely_molecule_path_wrapper(path,starter_mol):
tolerance = 9000 if graph_type == "ecoli" else 10 # tolerance for ecoli =the max distance between two starting points, if min overlap is 6000 and mols at 15kbp of length
debug = False
res = []
direction = None
......@@ -54,7 +57,7 @@ def extract_likely_molecule_path_wrapper(path,starter_mol):
for i,mols in enumerate(path):
good_mol = None
for mol in mols:
if (abs(previous_mol[0] - mol[0])< 9000): # the max distance between two starting points, if min overlap is 6000 and mols at 15kbp of length
if (abs(previous_mol[0] - mol[0])< tolerance):
""" shouldn't enforce directionality. why? because a->b->c can sometimes be a->c->b and still be correct, because a,b,c are a clique """
#if i == 0:
#direction = "decreasing" if previous_mol[0] > mol[0] else "increasing"
......@@ -85,13 +88,26 @@ def is_there_path_acc(path): # don't consider overlaps smaller than 5kbp
""" converts a central node of a d-graph into its list of molecules (given the ground truth) """
def central_node_to_molecules(nodestr):
# format for a 2-merge: 1:NC_000913.3_298281_313280_0:0:0_0:0:0_2fb/1_NC_000913.3_338611_353610_0:0:0_0:0:0_37b/1
global graph_type
cur_node_mols = []
for ide in nodestr.split('NC_')[1:]: # specific to e coli
#print(ide)
x = ide.split("_")
start, end = int(x[1]), int(x[2])
cur_node_mols += [(start,end)]
if 'NC_' in nodestr: # ecoli specific
# format for a 2-merge: 1:NC_000913.3_298281_313280_0:0:0_0:0:0_2fb/1_NC_000913.3_338611_353610_0:0:0_0:0:0_37b/1
for ide in nodestr.split('NC_')[1:]: # specific to e coli
graph_type = "ecoli"
#print(ide)
x = ide.split("_")
start, end = int(x[1]), int(x[2])
cur_node_mols += [(start,end)]
else:
if ':' not in nodestr:
# a non-merged node
cur_node_mols += [(int(nodestr),int(nodestr)+1)]
else:
nodestr = nodestr.split(':')[1]
nodestr = nodestr.replace(']','').replace('[','')
for ide in nodestr.split('_'):
#print(ide)
cur_node_mols += [(int(ide),int(ide)+1)] # a tuple just for compatibility with the ecoli format.. should be just int(ide)
return cur_node_mols
def is_coherent_path(central_nodes,path_len):
......@@ -183,7 +199,7 @@ def evaluate_sensitivity_paths(path_len,overlap_length=7000):
for mol in mols:
molecules_to_nodes[mol] += [node]
all_molecules = sorted(all_molecules)
print("got",len(all_molecules),"molecules total")
#print("got",len(all_molecules),"molecules total")
nb_found, nb_not_found = 0,0
for i in range(len(all_molecules)-path_len+1):
sought_path = all_molecules[i:i+path_len]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment