Commit 9e8cd8ae authored by Rayan  CHIKHI's avatar Rayan CHIKHI

modifs to evaluate path

parent 83e8d871
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[5000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[5] if "d" not in config else config["d"] # Average coverage of each molecule
M=[2] if "m" not in config else config["m"] # Average number of molecule per barcode
N=[10000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[10] if "d" not in config else config["d"] # Average coverage of each molecule
M=[3] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
......
......@@ -183,15 +183,24 @@ def str_to_udg_lists(s):
udg = s.replace("]", "").replace(' [', '[')
return udg.split('[')[1:]
# speeds up networkx access to attributes
cached_udg_attr = None
cached_score_attr = None
def parse_dg_name(gr, name):
udg = nx.get_node_attributes(gr, 'udg')[name]
global cached_udg_attr, cached_score_attr
if cached_udg_attr is None:
cached_udg_attr = nx.get_node_attributes(gr, 'udg')
udg = cached_udg_attr[name]
res = str_to_udg_lists(udg)
if len(res) != 3:
print("parsing problem:",res)
central, h1, h2 = res
idx = name
score = nx.get_node_attributes(gr, 'score')[name]
if cached_score_attr is None:
cached_score_attr = nx.get_node_attributes(gr, 'score')
score = cached_score_attr[name]
# Parse hands
h1 = h1.split(', ')
......@@ -259,21 +268,26 @@ def compute_next_nodes(d2_component):
# Get the current molecule idxs
molecule_idxs = mols_from_node(head[1])
#print("node",node,"dg name",dg_names[node],"mol idxs",molecule_idxs)
for mol_idx in molecule_idxs:
nexts = []
for neighbor in d2_component[node]:
# nei_head: central node of the neighbor of 'node'
nei_head, _, _ = dg_names[neighbor]
nei_mols = mols_from_node(nei_head[1])
# only consider neighbor molecules that are strictly bigger than the current molecule idx considered (from 'node')
nei_mols = [x for x in nei_mols if x > mol_idx]
# If there are molecule next
if len(nei_mols) > 0:
next_nei_mol = min(nei_mols)
# append to the neighbors of (node,mol_idx) that 'neighbor' if it contains a molecule that's bigger than mol_idx
nexts.append((next_nei_mol, neighbor))
nexts.sort(key=lambda x: x[0])
next_nodes[node][mol_idx] = nexts
# print(next_nodes)
#print("next nodes of node",node,"mol idx",mol_idx,":",next_nodes)
return next_nodes
......@@ -288,7 +302,12 @@ def compute_longest_increasing_paths(d2_component):
for mol_idx in next_nodes[start_node]:
recursive_longest_path(start_node, mol_idx , next_nodes, longest_paths)
# Get the longest path size
test_node = '5339'
for mol in longest_paths[test_node]:
print("investigating node",test_node,"mol",mol,longest_paths[test_node][mol])
# Get the longest path size,
# across all barcode graph nodes and all molecules in these barcodes
max_len, node_val, mol_idx = 0, None, -1
for node in longest_paths:
for mol in longest_paths[node]:
......@@ -315,6 +334,7 @@ def backtrack_longest_path(node, molecule, longest_paths, path=[]):
def recursive_longest_path(current_node, current_molecule, next_nodes, longest_paths):
# Dynamic programming
if current_node in longest_paths and current_molecule in longest_paths[current_node]:
#print("getting cached result for node",current_node,"mol",current_molecule,longest_paths[current_node][current_molecule])
return longest_paths[current_node][current_molecule]
longest = 0
......@@ -462,8 +482,20 @@ def main():
if args.type == "path":
barcode_graph = load_graph(args.barcode_graph)
frequencies = parse_path_graph_frequencies(graph, barcode_graph)
if len(list(nx.connected_components(graph))) != 1:
exit("when running evaluate.py --type path, the graph should have a single connected component (it's supposed to be a path, after all)")
# compute LIS over the path
longest_path = compute_longest_increasing_paths(graph)
print("--- Largest component analysis ---")
print(f"Size of the longest path: {len(longest_path)}")
#print("Jumps in central nodes:") # what does this do?
#print(path_to_jumps(longest_path))
return
# get over/under counted molecules
print("--- Under/over molecule counts ---")
frequencies = parse_path_graph_frequencies(graph, barcode_graph)
print_path_summary(frequencies, light_print=args.light_print)
elif args.type == "dgraphs":
udg_per_node = parse_udg_qualities(graph)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment