Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
linked reads molecule ordering
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
9e8cd8ae
Commit
9e8cd8ae
authored
May 13, 2020
by
Rayan CHIKHI
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
modifs to evaluate path
parent
83e8d871
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
40 additions
and
8 deletions
+40
-8
Snakefile_data_simu
Snakefile_data_simu
+3
-3
deconvolution/main/evaluate.py
deconvolution/main/evaluate.py
+37
-5
No files found.
Snakefile_data_simu
View file @
9e8cd8ae
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[
5
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
5
] if "d" not in config else config["d"] # Average coverage of each molecule
M=[
2
] if "m" not in config else config["m"] # Average number of molecule per barcode
N=[
10
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
10
] if "d" not in config else config["d"] # Average coverage of each molecule
M=[
3
] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
...
...
deconvolution/main/evaluate.py
View file @
9e8cd8ae
...
...
@@ -183,15 +183,24 @@ def str_to_udg_lists(s):
udg
=
s
.
replace
(
"]"
,
""
).
replace
(
' ['
,
'['
)
return
udg
.
split
(
'['
)[
1
:]
# speeds up networkx access to attributes
cached_udg_attr
=
None
cached_score_attr
=
None
def
parse_dg_name
(
gr
,
name
):
udg
=
nx
.
get_node_attributes
(
gr
,
'udg'
)[
name
]
global
cached_udg_attr
,
cached_score_attr
if
cached_udg_attr
is
None
:
cached_udg_attr
=
nx
.
get_node_attributes
(
gr
,
'udg'
)
udg
=
cached_udg_attr
[
name
]
res
=
str_to_udg_lists
(
udg
)
if
len
(
res
)
!=
3
:
print
(
"parsing problem:"
,
res
)
central
,
h1
,
h2
=
res
idx
=
name
score
=
nx
.
get_node_attributes
(
gr
,
'score'
)[
name
]
if
cached_score_attr
is
None
:
cached_score_attr
=
nx
.
get_node_attributes
(
gr
,
'score'
)
score
=
cached_score_attr
[
name
]
# Parse hands
h1
=
h1
.
split
(
', '
)
...
...
@@ -259,21 +268,26 @@ def compute_next_nodes(d2_component):
# Get the current molecule idxs
molecule_idxs
=
mols_from_node
(
head
[
1
])
#print("node",node,"dg name",dg_names[node],"mol idxs",molecule_idxs)
for
mol_idx
in
molecule_idxs
:
nexts
=
[]
for
neighbor
in
d2_component
[
node
]:
# nei_head: central node of the neighbor of 'node'
nei_head
,
_
,
_
=
dg_names
[
neighbor
]
nei_mols
=
mols_from_node
(
nei_head
[
1
])
# only consider neighbor molecules that are strictly bigger than the current molecule idx considered (from 'node')
nei_mols
=
[
x
for
x
in
nei_mols
if
x
>
mol_idx
]
# If there are molecule next
if
len
(
nei_mols
)
>
0
:
next_nei_mol
=
min
(
nei_mols
)
# append to the neighbors of (node,mol_idx) that 'neighbor' if it contains a molecule that's bigger than mol_idx
nexts
.
append
((
next_nei_mol
,
neighbor
))
nexts
.
sort
(
key
=
lambda
x
:
x
[
0
])
next_nodes
[
node
][
mol_idx
]
=
nexts
#
print(
next_nodes)
#
print("next nodes of node",node,"mol idx",mol_idx,":",
next_nodes)
return
next_nodes
...
...
@@ -288,7 +302,12 @@ def compute_longest_increasing_paths(d2_component):
for
mol_idx
in
next_nodes
[
start_node
]:
recursive_longest_path
(
start_node
,
mol_idx
,
next_nodes
,
longest_paths
)
# Get the longest path size
test_node
=
'5339'
for
mol
in
longest_paths
[
test_node
]:
print
(
"investigating node"
,
test_node
,
"mol"
,
mol
,
longest_paths
[
test_node
][
mol
])
# Get the longest path size,
# across all barcode graph nodes and all molecules in these barcodes
max_len
,
node_val
,
mol_idx
=
0
,
None
,
-
1
for
node
in
longest_paths
:
for
mol
in
longest_paths
[
node
]:
...
...
@@ -315,6 +334,7 @@ def backtrack_longest_path(node, molecule, longest_paths, path=[]):
def
recursive_longest_path
(
current_node
,
current_molecule
,
next_nodes
,
longest_paths
):
# Dynamic programming
if
current_node
in
longest_paths
and
current_molecule
in
longest_paths
[
current_node
]:
#print("getting cached result for node",current_node,"mol",current_molecule,longest_paths[current_node][current_molecule])
return
longest_paths
[
current_node
][
current_molecule
]
longest
=
0
...
...
@@ -462,8 +482,20 @@ def main():
if
args
.
type
==
"path"
:
barcode_graph
=
load_graph
(
args
.
barcode_graph
)
frequencies
=
parse_path_graph_frequencies
(
graph
,
barcode_graph
)
if
len
(
list
(
nx
.
connected_components
(
graph
)))
!=
1
:
exit
(
"when running evaluate.py --type path, the graph should have a single connected component (it's supposed to be a path, after all)"
)
# compute LIS over the path
longest_path
=
compute_longest_increasing_paths
(
graph
)
print
(
"--- Largest component analysis ---"
)
print
(
f
"Size of the longest path:
{
len
(
longest_path
)
}
"
)
#print("Jumps in central nodes:") # what does this do?
#print(path_to_jumps(longest_path))
return
# get over/under counted molecules
print
(
"--- Under/over molecule counts ---"
)
frequencies
=
parse_path_graph_frequencies
(
graph
,
barcode_graph
)
print_path_summary
(
frequencies
,
light_print
=
args
.
light_print
)
elif
args
.
type
==
"dgraphs"
:
udg_per_node
=
parse_udg_qualities
(
graph
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment