Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
linked reads molecule ordering
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
86fce860
Commit
86fce860
authored
May 12, 2020
by
Rayan Chikhi
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix for accuracy of lcp-graph path detection algo
parent
01d8b380
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
51 additions
and
28 deletions
+51
-28
deconvolution/main/d2_random_path_evaluation.py
deconvolution/main/d2_random_path_evaluation.py
+51
-28
No files found.
deconvolution/main/d2_path_evaluation.py
→
deconvolution/main/d2_
random_
path_evaluation.py
View file @
86fce860
...
...
@@ -33,7 +33,7 @@ import random
def
findRandomPath
(
G
,
u
,
n
,
previous_path_nodes
=
set
()):
if
n
==
0
:
return
[
u
]
poss_neigh
=
list
(
set
(
G
.
neighbors
(
u
))
-
previous_path_nodes
)
poss_neigh
=
list
(
set
(
G
.
neighbors
(
u
))
-
previous_path_nodes
-
set
([
u
])
)
if
len
(
poss_neigh
)
==
0
:
return
None
neighbor
=
random
.
choice
(
poss_neigh
)
new_previous_path_nodes
=
previous_path_nodes
|
set
([
u
])
...
...
@@ -42,29 +42,46 @@ def findRandomPath(G,u,n,previous_path_nodes=set()):
if
len
(
path
)
!=
n
:
return
None
return
[
u
]
+
path
"""
given as input a list of molecules in central nodes,
return a stripped-down list that only includes molecules that overlap their neighbor
"""
def
extract_likely_molecule_path_wrapper
(
path
,
starter_mol
):
debug
=
False
res
=
[]
direction
=
None
previous_mol
=
starter_mol
for
i
,
mols
in
enumerate
(
path
):
good_mol
=
None
for
mol
in
mols
:
if
(
abs
(
previous_mol
[
0
]
-
mol
[
0
])
<
9000
):
# the max distance between two starting points, if min overlap is 6000 and mols at 15kbp of length
""" shouldn't enforce directionality. why? because a->b->c can sometimes be a->c->b and still be correct, because a,b,c are a clique """
#if i == 0:
#direction = "decreasing" if previous_mol[0] > mol[0] else "increasing"
good_mol
=
mol
"""
else:
if direction == "increasing":
if previous_mol[0] <= mol[0]:
good_mol = mol
else:
if mol[0] <= previous_mol[0]:
good_mol = mol
"""
if
good_mol
is
None
:
if
debug
:
print
(
"starting"
,
starter_mol
,
"bad path"
,
path
,
res
)
return
None
previous_mol
=
good_mol
res
+=
[
good_mol
]
if
debug
:
print
(
"starting"
,
starter_mol
,
"good path"
,
path
,
res
)
return
res
import
itertools
""" determine, given an ordered list of central nodes, whether there exists a coherent paths of overlapping molecules in them """
def
is_there_path_acc
(
mols_in_central_nodes
,
min_overlap_length
=
5000
):
# don't consider overlaps smaller than 5kbp
for
mols
in
itertools
.
product
(
*
mols_in_central_nodes
):
#print(mols)
last_end
=
None
last_start
=
None
good_path
=
True
for
mol
in
sorted
(
mols
):
start
,
end
=
mol
if
last_end
is
None
:
last_end
=
end
if
last_start
is
None
:
last_start
=
start
if
not
(
start
>=
last_start
and
start
<=
last_end
-
min_overlap_length
):
#print("bad path",mols)
good_path
=
False
break
last_end
=
end
last_start
=
start
if
good_path
:
return
True
return
False
def
is_there_path_acc
(
path
):
# don't consider overlaps smaller than 5kbp
# try with all possible starting molecules
return
any
(
extract_likely_molecule_path_wrapper
(
path
[
1
:],
mol
)
is
not
None
for
mol
in
path
[
0
])
""" converts a central node of a d-graph into its list of molecules (given the ground truth) """
def
central_node_to_molecules
(
nodestr
):
...
...
@@ -80,20 +97,22 @@ def central_node_to_molecules(nodestr):
def
is_coherent_path
(
central_nodes
,
path_len
):
mols_in_central_nodes
=
[]
for
node
in
central_nodes
:
#print("central node",node)
cur_node_mols
=
central_node_to_molecules
(
node
)
mols_in_central_nodes
+=
[
cur_node_mols
]
assert
(
len
(
mols_in_central_nodes
)
==
path_len
+
1
)
return
is_there_path_acc
(
mols_in_central_nodes
)
#return True
def
get_barcode
(
x
):
if
'udg'
in
graph
.
nodes
[
x
]:
return
graph
.
nodes
[
x
][
'udg'
].
split
(
)[
0
]
return
graph
.
nodes
[
x
][
'udg'
].
split
(
']'
)[
0
]
# here the format may change..
else
:
return
graph
.
nodes
[
x
][
'label'
]
""" the main function that tests for accuracy of random paths """
graph
=
None
def
evaluate_accuracy_paths
(
path_len
,
max_paths_per_node
=
100
):
def
evaluate_accuracy_paths
(
path_len
,
max_paths_per_node
=
5
):
global
graph
nb_bad_paths
=
0
nb_good_paths
=
0
...
...
@@ -114,7 +133,8 @@ def evaluate_accuracy_paths(path_len,max_paths_per_node=100):
nb_good_paths
+=
1
else
:
nb_bad_paths
+=
1
print
(
"accuracy for l=%d:"
%
path_len
,
nb_good_paths
/
(
nb_good_paths
+
nb_bad_paths
))
nb_paths
=
nb_bad_paths
+
nb_good_paths
print
(
"accuracy for l=%d (%d paths, %d nodes explored):"
%
(
path_len
,
nb_paths
,
nb_paths
*
path_len
),
nb_good_paths
/
(
nb_good_paths
+
nb_bad_paths
))
# ---- sensitivity evaluation
...
...
@@ -160,9 +180,12 @@ def main():
args
=
parse_args
()
graph
=
load_graph
(
args
.
filename
)
p
=
Pool
(
4
)
p
.
map
(
evaluate_accuracy_paths
,
[
1
,
2
,
3
,
4
])
p
.
map
(
evaluate_sensitivity_paths
,
[
1
,
2
,
3
,
4
])
p
=
Pool
(
12
)
#p.map(evaluate_accuracy_paths, [1,2,4,6,8,10,15,20,50,100,200,500])
p
.
map
(
evaluate_accuracy_paths
,
[
2
,
4
,
10
,
100
])
p
.
join
()
#p.map(evaluate_sensitivity_paths, [1,2,3,4])
#evaluate_accuracy_paths(100)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment