Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
d1bb245c
Commit
d1bb245c
authored
Sep 05, 2019
by
Yoann Dufresne
Browse files
finitions for d2_to_path
parent
e51992a0
Changes
3
Hide whitespace changes
Inline
Side-by-side
deconvolution/d2_path.py
View file @
d1bb245c
...
...
@@ -62,7 +62,21 @@ class Path(list):
return
num_covered
def
save_path
(
self
,
filename
):
d2p
=
self
.
d2g
.
subgraph
([
str
(
x
.
idx
)
for
x
in
self
])
d2p
=
nx
.
Graph
()
# Add the nodes
for
udg
in
self
:
d2p
.
add_node
(
udg
.
idx
)
d2p
.
nodes
[
udg
.
idx
][
"center"
]
=
udg
.
center
d2p
.
nodes
[
udg
.
idx
][
"udg"
]
=
str
(
udg
)
d2p
.
nodes
[
udg
.
idx
][
"score"
]
=
f
"
{
udg
.
score
}
/
{
udg
.
get_optimal_score
()
}
"
# add the edges
for
idx
in
range
(
len
(
self
)
-
1
):
udg1
=
self
[
idx
]
udg2
=
self
[
idx
+
1
]
d2p
.
add_edge
(
udg1
.
idx
,
udg2
.
idx
)
nx
.
write_gexf
(
d2p
,
filename
)
def
save_path_in_graph
(
self
,
filename
):
...
...
@@ -81,4 +95,16 @@ class Unitig(Path):
self
.
add_path
([
udg
])
def
d2_path_to_barcode_path
(
path
):
barcode_per_idx
=
[
set
(
udg
.
to_list
())
for
udg
in
path
]
diff_barcode_per_idx
=
[]
rev_diff_barcode_per_idx
=
[]
for
idx
in
range
(
len
(
barcode_per_idx
)
-
1
):
diff_barcode_per_idx
.
append
(
barcode_per_idx
[
idx
]
-
barcode_per_idx
[
idx
+
1
])
rev_diff_barcode_per_idx
.
append
(
barcode_per_idx
[
idx
+
1
]
-
barcode_per_idx
[
idx
])
diff_barcode_per_idx
.
append
(
barcode_per_idx
[
-
1
]
-
diff_barcode_per_idx
[
-
1
])
rev_diff_barcode_per_idx
.
insert
(
0
,
barcode_per_idx
[
0
]
-
rev_diff_barcode_per_idx
[
0
])
for
diff
,
rev_diff
in
zip
(
diff_barcode_per_idx
,
rev_diff_barcode_per_idx
):
print
(
diff
,
rev_diff
)
deconvolution/d2_to_path.py
View file @
d1bb245c
#!/usr/bin/env python3
import
networkx
as
nx
import
path_optimization
as
po
import
argparse
import
sys
import
d2_graph
as
d2
import
path_algorithms
as
pa
from
d2_algorithms
import
compute_unitigs
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Greedy construction of a path through the d2 graph.'
)
parser
.
add_argument
(
'barcode_graph'
,
help
=
'The barcode graph file. Must be a gefx formatted file.'
)
parser
.
add_argument
(
'd2_graph'
,
help
=
'd2 graph to reduce. Must be a gexf formatted file.'
)
parser
.
add_argument
(
'--out
put_path
'
,
'-o'
,
default
=
"
d2_path.gexf
"
,
help
=
"Output file prefix."
)
parser
.
add_argument
(
'--out
_prefix
'
,
'-o'
,
default
=
""
,
help
=
"Output file prefix."
)
args
=
parser
.
parse_args
()
return
args
...
...
@@ -38,34 +37,21 @@ def main():
largest_component_nodes
=
max
(
nx
.
connected_components
(
d2g
),
key
=
len
)
largest_component
=
d2g
.
subgraph
(
largest_component_nodes
)
import
path_optimization
as
po
# Start optimization
optimizer
=
po
.
Optimizer
(
largest_component
)
optimizer
.
init_random_solutions
(
1
)
solution
=
optimizer
.
solutions
[
0
]
print
(
solution
)
print
(
solution
.
covering_score
())
print
(
"Solution creation..."
)
optimizer
.
extends_until_end
(
solution
)
print
(
solution
.
covering_score
())
print
(
f
"covering score:
{
solution
.
covering_score
()
}
"
)
solution
.
save_path_in_graph
(
"data/test_d2_path.gexf"
)
solution
.
save_path
(
"data/test_path.gexf"
)
solution
.
save_path_in_graph
(
f
"
{
args
.
out_prefix
}
_d2_path.gexf"
)
solution
.
save_path
(
f
"
{
args
.
out_prefix
}
_path.gexf"
)
print
(
"Solution saved"
)
# unitigs = compute_unitigs(largest_component)
# path = pa.construct_path_from_unitigs(unitigs, largest_component)
# print("\n".join([str(x) for x in path]))
# print(path.covering_score())
# diameter = nx.diameter(largest_component)
# print(diameter)
# Write the simplified graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name)
# from d2_path import d2_path_to_barcode_path
# d2_path_to_barcode_path(solution)
if
__name__
==
"__main__"
:
...
...
deconvolution/path_optimization.py
View file @
d1bb245c
...
...
@@ -11,16 +11,15 @@ class Optimizer:
def
init_random_solutions
(
self
,
nb_solutions
):
for
_
in
range
(
nb_solutions
):
rnd_sol
=
Solution
(
self
.
d2g
)
rnd_sol
.
random_init
()
rnd_sol
.
random_init
_best_quality
()
self
.
solutions
.
append
(
rnd_sol
)
def
extends_until_end
(
self
,
solution
):
while
self
.
extends
(
solution
):
print
(
len
(
solution
))
continue
solution
.
reverse
()
print
(
"reverse the solution"
)
while
self
.
extends
(
solution
):
print
(
len
(
solution
))
continue
def
extends
(
self
,
solution
):
# Get all the neighbors
...
...
@@ -28,24 +27,15 @@ class Optimizer:
neighbors
=
[
self
.
d2g
.
node_by_idx
[
int
(
x
)]
for
x
in
self
.
d2g
.
neighbors
(
cur_id
)
if
self
.
d2g
.
node_by_idx
[
int
(
x
)]
not
in
solution
]
# filter the neighbors if they are not contributing to variable coverage
# filtered = []
current_vars
=
frozenset
([
x
for
x
,
y
in
solution
.
covering_variables
.
items
()
if
y
>
0
])
# for nei_id in neighbors:
# nei = self.d2g.node_by_idx[int(nei_id)]
# variables = self.d2g.get_covering_variables(nei)
# new_vars = variables - current_vars
# if len(new_vars) > 0:
# filtered.append(nei)
if
len
(
neighbors
)
==
0
:
return
False
# Choose using the multiple optimization directions
next_udg
=
min
(
neighbors
,
key
=
lambda
x
:
(
1
if
len
(
self
.
d2g
.
get_covering_variables
(
x
)
-
current_vars
)
==
0
else
0
,
self
.
d2g
[
str
(
x
.
idx
)][
cur_id
][
"dista
nce
"
]
,
x
.
get_link_diverge
nce
()
))
x
.
get_link_diverge
nce
()
,
self
.
d2g
[
str
(
x
.
idx
)][
cur_id
][
"dista
nce
"
]
))
solution
.
add_path
([
next_udg
])
return
True
...
...
@@ -55,9 +45,12 @@ class Solution(Path):
def
__init__
(
self
,
d2g
):
super
(
Solution
,
self
).
__init__
(
d2g
)
def
random_init
(
self
):
random_node_idx
=
random
.
choice
(
list
(
self
.
d2g
.
nodes
()))
random_node
=
self
.
d2g
.
node_by_idx
[
int
(
random_node_idx
)]
def
random_init_best_quality
(
self
):
nodes
=
[
self
.
d2g
.
node_by_idx
[
int
(
x
)]
for
x
in
list
(
self
.
d2g
.
nodes
())]
min_div
=
(
min
(
nodes
,
key
=
lambda
x
:
x
.
get_link_divergence
())).
get_link_divergence
()
nodes
=
[
x
for
x
in
nodes
if
x
.
get_link_divergence
()
==
min_div
]
random_udg
=
random
.
choice
(
nodes
)
self
.
clear
()
self
.
add_path
([
random_
node
])
self
.
add_path
([
random_
udg
])
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment