Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
84920ea8
Commit
84920ea8
authored
May 15, 2020
by
Rayan CHIKHI
Browse files
initial modifs for interval graphs evaluation acc/sens
parent
4782df1e
Changes
2
Hide whitespace changes
Inline
Side-by-side
Snakefile_data_simu
View file @
84920ea8
WORKDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[
5
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
5
] if "d" not in config else config["d"] # Average coverage of each molecule
N=[
10
000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[
10
] if "d" not in config else config["d"] # Average coverage of each molecule
M=[3] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[
0
] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
M_DEV=[
1
] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
iter=1
# snake_experiments/simu_0_bar_n500_d5_m2.gexf
...
...
deconvolution/main/d2_random_path_evaluation.py
View file @
84920ea8
...
...
@@ -10,6 +10,8 @@ import argparse
from
termcolor
import
colored
import
networkx
as
nx
graph_type
=
"interval"
# will be set to "ecoli" if we detect that it is, in fact, a barcode graph simulated from ecoli
# this affects how we evaluate that path is correct: mols starts absolute diff < 9k for ecoli, < 5--10 for interval graphs
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Process some integers.'
)
...
...
@@ -47,6 +49,7 @@ given as input a list of molecules in central nodes,
return a stripped-down list that only includes molecules that overlap their neighbor
"""
def
extract_likely_molecule_path_wrapper
(
path
,
starter_mol
):
tolerance
=
9000
if
graph_type
==
"ecoli"
else
10
# tolerance for ecoli =the max distance between two starting points, if min overlap is 6000 and mols at 15kbp of length
debug
=
False
res
=
[]
direction
=
None
...
...
@@ -54,7 +57,7 @@ def extract_likely_molecule_path_wrapper(path,starter_mol):
for
i
,
mols
in
enumerate
(
path
):
good_mol
=
None
for
mol
in
mols
:
if
(
abs
(
previous_mol
[
0
]
-
mol
[
0
])
<
9000
):
# the max distance between two starting points, if min overlap is 6000 and mols at 15kbp of length
if
(
abs
(
previous_mol
[
0
]
-
mol
[
0
])
<
tolerance
):
""" shouldn't enforce directionality. why? because a->b->c can sometimes be a->c->b and still be correct, because a,b,c are a clique """
#if i == 0:
#direction = "decreasing" if previous_mol[0] > mol[0] else "increasing"
...
...
@@ -85,13 +88,26 @@ def is_there_path_acc(path): # don't consider overlaps smaller than 5kbp
""" converts a central node of a d-graph into its list of molecules (given the ground truth) """
def
central_node_to_molecules
(
nodestr
):
# format for a 2-merge: 1:NC_000913.3_298281_313280_0:0:0_0:0:0_2fb/1_NC_000913.3_338611_353610_0:0:0_0:0:0_37b/1
global
graph_type
cur_node_mols
=
[]
for
ide
in
nodestr
.
split
(
'NC_'
)[
1
:]:
# specific to e coli
#print(ide)
x
=
ide
.
split
(
"_"
)
start
,
end
=
int
(
x
[
1
]),
int
(
x
[
2
])
cur_node_mols
+=
[(
start
,
end
)]
if
'NC_'
in
nodestr
:
# ecoli specific
# format for a 2-merge: 1:NC_000913.3_298281_313280_0:0:0_0:0:0_2fb/1_NC_000913.3_338611_353610_0:0:0_0:0:0_37b/1
for
ide
in
nodestr
.
split
(
'NC_'
)[
1
:]:
# specific to e coli
graph_type
=
"ecoli"
#print(ide)
x
=
ide
.
split
(
"_"
)
start
,
end
=
int
(
x
[
1
]),
int
(
x
[
2
])
cur_node_mols
+=
[(
start
,
end
)]
else
:
if
':'
not
in
nodestr
:
# a non-merged node
cur_node_mols
+=
[(
int
(
nodestr
),
int
(
nodestr
)
+
1
)]
else
:
nodestr
=
nodestr
.
split
(
':'
)[
1
]
nodestr
=
nodestr
.
replace
(
']'
,
''
).
replace
(
'['
,
''
)
for
ide
in
nodestr
.
split
(
'_'
):
#print(ide)
cur_node_mols
+=
[(
int
(
ide
),
int
(
ide
)
+
1
)]
# a tuple just for compatibility with the ecoli format.. should be just int(ide)
return
cur_node_mols
def
is_coherent_path
(
central_nodes
,
path_len
):
...
...
@@ -183,7 +199,7 @@ def evaluate_sensitivity_paths(path_len,overlap_length=7000):
for
mol
in
mols
:
molecules_to_nodes
[
mol
]
+=
[
node
]
all_molecules
=
sorted
(
all_molecules
)
print
(
"got"
,
len
(
all_molecules
),
"molecules total"
)
#
print("got",len(all_molecules),"molecules total")
nb_found
,
nb_not_found
=
0
,
0
for
i
in
range
(
len
(
all_molecules
)
-
path_len
+
1
):
sought_path
=
all_molecules
[
i
:
i
+
path_len
]
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment