Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
9e8cd8ae
Commit
9e8cd8ae
authored
May 13, 2020
by
Rayan CHIKHI
Browse files
modifs to evaluate path
parent
83e8d871
Changes
2
Hide whitespace changes
Inline
Side-by-side
Snakefile_data_simu
View file @
9e8cd8ae
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[
5
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
5
] if "d" not in config else config["d"] # Average coverage of each molecule
M=[
2
] if "m" not in config else config["m"] # Average number of molecule per barcode
N=[
10
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
10
] if "d" not in config else config["d"] # Average coverage of each molecule
M=[
3
] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
...
...
deconvolution/main/evaluate.py
View file @
9e8cd8ae
...
...
@@ -183,15 +183,24 @@ def str_to_udg_lists(s):
udg
=
s
.
replace
(
"]"
,
""
).
replace
(
' ['
,
'['
)
return
udg
.
split
(
'['
)[
1
:]
# speeds up networkx access to attributes
cached_udg_attr
=
None
cached_score_attr
=
None
def
parse_dg_name
(
gr
,
name
):
udg
=
nx
.
get_node_attributes
(
gr
,
'udg'
)[
name
]
global
cached_udg_attr
,
cached_score_attr
if
cached_udg_attr
is
None
:
cached_udg_attr
=
nx
.
get_node_attributes
(
gr
,
'udg'
)
udg
=
cached_udg_attr
[
name
]
res
=
str_to_udg_lists
(
udg
)
if
len
(
res
)
!=
3
:
print
(
"parsing problem:"
,
res
)
central
,
h1
,
h2
=
res
idx
=
name
score
=
nx
.
get_node_attributes
(
gr
,
'score'
)[
name
]
if
cached_score_attr
is
None
:
cached_score_attr
=
nx
.
get_node_attributes
(
gr
,
'score'
)
score
=
cached_score_attr
[
name
]
# Parse hands
h1
=
h1
.
split
(
', '
)
...
...
@@ -259,21 +268,26 @@ def compute_next_nodes(d2_component):
# Get the current molecule idxs
molecule_idxs
=
mols_from_node
(
head
[
1
])
#print("node",node,"dg name",dg_names[node],"mol idxs",molecule_idxs)
for
mol_idx
in
molecule_idxs
:
nexts
=
[]
for
neighbor
in
d2_component
[
node
]:
# nei_head: central node of the neighbor of 'node'
nei_head
,
_
,
_
=
dg_names
[
neighbor
]
nei_mols
=
mols_from_node
(
nei_head
[
1
])
# only consider neighbor molecules that are strictly bigger than the current molecule idx considered (from 'node')
nei_mols
=
[
x
for
x
in
nei_mols
if
x
>
mol_idx
]
# If there are molecule next
if
len
(
nei_mols
)
>
0
:
next_nei_mol
=
min
(
nei_mols
)
# append to the neighbors of (node,mol_idx) that 'neighbor' if it contains a molecule that's bigger than mol_idx
nexts
.
append
((
next_nei_mol
,
neighbor
))
nexts
.
sort
(
key
=
lambda
x
:
x
[
0
])
next_nodes
[
node
][
mol_idx
]
=
nexts
#
print(next_nodes)
#print(
"next nodes of node",node,"mol idx",mol_idx,":",
next_nodes)
return
next_nodes
...
...
@@ -288,7 +302,12 @@ def compute_longest_increasing_paths(d2_component):
for
mol_idx
in
next_nodes
[
start_node
]:
recursive_longest_path
(
start_node
,
mol_idx
,
next_nodes
,
longest_paths
)
# Get the longest path size
test_node
=
'5339'
for
mol
in
longest_paths
[
test_node
]:
print
(
"investigating node"
,
test_node
,
"mol"
,
mol
,
longest_paths
[
test_node
][
mol
])
# Get the longest path size,
# across all barcode graph nodes and all molecules in these barcodes
max_len
,
node_val
,
mol_idx
=
0
,
None
,
-
1
for
node
in
longest_paths
:
for
mol
in
longest_paths
[
node
]:
...
...
@@ -315,6 +334,7 @@ def backtrack_longest_path(node, molecule, longest_paths, path=[]):
def
recursive_longest_path
(
current_node
,
current_molecule
,
next_nodes
,
longest_paths
):
# Dynamic programming
if
current_node
in
longest_paths
and
current_molecule
in
longest_paths
[
current_node
]:
#print("getting cached result for node",current_node,"mol",current_molecule,longest_paths[current_node][current_molecule])
return
longest_paths
[
current_node
][
current_molecule
]
longest
=
0
...
...
@@ -462,8 +482,20 @@ def main():
if
args
.
type
==
"path"
:
barcode_graph
=
load_graph
(
args
.
barcode_graph
)
frequencies
=
parse_path_graph_frequencies
(
graph
,
barcode_graph
)
if
len
(
list
(
nx
.
connected_components
(
graph
)))
!=
1
:
exit
(
"when running evaluate.py --type path, the graph should have a single connected component (it's supposed to be a path, after all)"
)
# compute LIS over the path
longest_path
=
compute_longest_increasing_paths
(
graph
)
print
(
"--- Largest component analysis ---"
)
print
(
f
"Size of the longest path:
{
len
(
longest_path
)
}
"
)
#print("Jumps in central nodes:") # what does this do?
#print(path_to_jumps(longest_path))
return
# get over/under counted molecules
print
(
"--- Under/over molecule counts ---"
)
frequencies
=
parse_path_graph_frequencies
(
graph
,
barcode_graph
)
print_path_summary
(
frequencies
,
light_print
=
args
.
light_print
)
elif
args
.
type
==
"dgraphs"
:
udg_per_node
=
parse_udg_qualities
(
graph
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment