Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
2c22fc66
Commit
2c22fc66
authored
Jul 29, 2019
by
Yoann Dufresne
Browse files
d2 graph basic analysis completed
parent
17971a94
Changes
1
Hide whitespace changes
Inline
Side-by-side
deconvolution/evaluate.py
View file @
2c22fc66
...
...
@@ -2,6 +2,7 @@
import
sys
import
csv
import
argparse
from
termcolor
import
colored
...
...
@@ -16,6 +17,8 @@ def parse_args():
help
=
"Define the data type to evaluate. Must be 'd2' or 'path'."
)
parser
.
add_argument
(
'--light-print'
,
'-l'
,
action
=
'store_true'
,
help
=
'Print only wrong nodes and paths'
)
parser
.
add_argument
(
'--optimization_file'
,
'-o'
,
help
=
"If the main file is a d2, a file formated for optimization can be set. This file will be used to compute the coverage of the longest path on the barcode graph."
)
args
=
parser
.
parse_args
()
return
args
...
...
@@ -172,13 +175,52 @@ def parse_dg_name(name):
return
(
idx
,
central
,
score
),
h1
,
h2
def
print_d2_summary
(
connected_components
,
light_print
=
False
):
def
path_to_jumps
(
path
):
chuncks
=
[]
prev_start
=
-
1000
current_molecule
=
-
1000
for
mol
,
node
in
path
:
# If there is a gap
if
mol
>
current_molecule
+
1
:
chuncks
.
append
((
prev_start
,
current_molecule
))
prev_start
=
mol
current_molecule
=
mol
# Add the last piece
chuncks
.
append
((
prev_start
,
current_molecule
))
del
chuncks
[
0
]
return
chuncks
def
print_d2_summary
(
connected_components
,
longest_path
,
covered_vars
=
{},
light_print
=
False
):
print
(
"--- Global summary ---"
)
print
(
f
"Number of connected components:
{
len
(
connected_components
)
}
"
)
print
(
f
"Total number of nodes:
{
sum
([
len
(
x
)
for
x
in
connected_components
])
}
"
)
print
(
f
"The 5 largest components:
{
[
len
(
x
)
for
x
in
connected_components
][
:
5
]
}
"
)
print
(
"--- Largest component analysis ---"
)
# Get the list of node idx
path_dg_idx
=
[
int
(
x
[
1
].
split
(
" "
)[
0
])
for
x
in
longest_path
]
# print("\n".join(longest_path))
if
not
light_print
:
print
(
"Longest path for increasing molecule number:"
)
print
(
path_dg_idx
)
print
(
f
"Size of the longest path:
{
len
(
longest_path
)
}
"
)
print
(
"Jumps in central nodes:"
)
print
(
path_to_jumps
(
longest_path
))
print
(
f
"Number of optimization variable coverage:
{
len
(
covered_vars
)
}
"
)
nb_true
=
0
falses
=
[]
for
idx
,
val
in
covered_vars
.
items
():
if
val
:
nb_true
+=
1
else
:
falses
.
append
(
idx
)
print
(
f
"Coverage:
{
nb_true
}
/
{
len
(
covered_vars
)
}
"
)
print
(
f
"Uncovered_values:
\n
{
falses
}
"
)
...
...
@@ -266,8 +308,7 @@ def backtrack_longest_path(node, molecule, longest_paths, path=[]):
if
node
==
None
:
return
path
path
.
append
(
node
)
print
(
node
,
molecule
)
path
.
append
((
molecule
,
node
))
length
,
next_node
,
next_mol
=
longest_paths
[
node
][
molecule
]
return
backtrack_longest_path
(
next_node
,
next_mol
,
longest_paths
,
path
)
...
...
@@ -300,6 +341,42 @@ def recursive_longest_path(current_node, current_molecule, next_nodes, longest_p
return
longest_paths
[
current_node
][
current_molecule
]
def
compute_covered_variables
(
optimization_file
,
path
):
vars
=
None
var_assignments
=
{}
# Read optimization variables
with
open
(
optimization_file
)
as
of
:
header
=
of
.
readline
()
header
=
[
int
(
x
)
for
x
in
header
.
split
(
" "
)]
nb_nodes
=
header
[
0
]
nb_vars
=
header
[
1
]
vars
=
{
x
:
False
for
x
in
range
(
nb_vars
)}
# nb_true = 0
# for x in vars.values():
# if x: nb_true += 1
# print(nb_true)
# exit()
for
idx
,
line
in
enumerate
(
of
):
# Stop at the end of nodes
if
idx
>=
nb_nodes
:
break
parsed
=
[
int
(
x
)
for
x
in
line
.
split
(
' '
)]
var_assignments
[
parsed
[
0
]]
=
parsed
[
1
:]
print
(
var_assignments
[
0
])
# Read the path to cover the variables
for
node
in
path
:
node_idx
=
int
(
node
[
1
].
split
(
" "
)[
0
])
for
var_idx
in
var_assignments
[
node_idx
]:
vars
[
var_idx
]
=
True
return
vars
def
main
():
args
=
parse_args
()
graph
=
load_graph
(
args
.
filename
)
...
...
@@ -315,7 +392,10 @@ def main():
component
=
graph
.
subgraph
(
components
[
0
])
longest_path
=
compute_longest_increasing_paths
(
component
)
print_d2_summary
(
components
,
longest_path
,
light_print
=
args
.
light_print
)
covered_vars
=
{}
if
args
.
optimization_file
and
len
(
args
.
optimization_file
)
>
0
:
covered_vars
=
compute_covered_variables
(
args
.
optimization_file
,
longest_path
)
print_d2_summary
(
components
,
longest_path
,
covered_vars
=
covered_vars
,
light_print
=
args
.
light_print
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment