Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
cf3ae74c
Commit
cf3ae74c
authored
Jun 11, 2019
by
Yoann Dufresne
Browse files
save d2_graph
parent
0d701c92
Changes
6
Hide whitespace changes
Inline
Side-by-side
deconvolution/d2_algorithms.py
View file @
cf3ae74c
import
networkx
as
nx
from
d2_path
import
Path
,
Unitig
def
greedy_reduct
(
d2
):
""" Compute a graph where, for each node, the neighbors are the closest neighbors.
...
...
@@ -35,7 +37,7 @@ def filter_singeltons(graph):
return
graph
def
compute_unitigs
(
graph
):
def
compute_unitigs
(
graph
,
d2g
):
""" Compute the unambiguious paths
"""
unitigs
=
[]
...
...
@@ -48,7 +50,7 @@ def compute_unitigs(graph):
continue
# Start the unitig
unitig
=
[
node
]
unitig
=
Unitig
(
d2g
,
[
node
]
)
already_used
[
node
]
=
True
# Save the unitig
unitigs
.
append
(
unitig
)
...
...
@@ -60,49 +62,69 @@ def compute_unitigs(graph):
# Randomly determine the right and a left neighbors
neighbors
=
list
(
graph
[
node
])
if
len
(
neighbors
)
==
1
:
right
=
neighbors
[
0
]
left
=
None
else
:
right
=
neighbors
[
0
]
left
=
neighbors
[
1
]
# --- Explore the RIGHT side of the unitig ---
previous
,
current
=
node
,
right
while
len
(
graph
[
current
])
==
2
and
not
already_used
[
current
]:
unitig
.
append
(
current
)
already_used
[
current
]
=
True
# Go to next neighbor
neighbors
=
list
(
graph
[
current
])
new_node
=
neighbors
[
0
]
if
neighbors
[
0
]
!=
previous
else
neighbors
[
1
]
previous
=
current
current
=
new_node
# Add the right extremity of the unitig
if
len
(
graph
[
current
])
==
1
:
unitig
.
append
(
current
)
already_used
[
current
]
=
True
# --- Explore the LEFT side of the unitig ---
if
left
==
None
:
continue
adding_functions
=
[
unitig
.
add_right
]
if
len
(
neighbors
)
==
2
:
adding_functions
.
append
(
unitig
.
add_left
)
# --- Explore the sides of the unitig ---
for
idx
in
range
(
len
(
neighbors
)):
previous
,
current
=
node
,
neighbors
[
idx
]
while
len
(
graph
[
current
])
==
2
and
not
already_used
[
current
]:
adding_functions
[
idx
](
current
)
already_used
[
current
]
=
True
# Go to next neighbor
neighbors
=
list
(
graph
[
current
])
new_node
=
neighbors
[
idx
]
if
neighbors
[
idx
]
!=
previous
else
neighbors
[(
idx
+
1
)
%
2
]
previous
=
current
current
=
new_node
# Add the extremity of the unitig
if
len
(
graph
[
current
])
==
1
:
adding_functions
[
idx
](
current
)
already_used
[
current
]
=
True
previous
,
current
=
node
,
left
while
len
(
graph
[
current
])
==
2
and
not
already_used
[
current
]:
unitig
.
insert
(
0
,
current
)
already_used
[
current
]
=
True
return
unitigs
# Go to next neighbor
neighbors
=
list
(
graph
[
current
])
new_node
=
neighbors
[
0
]
if
neighbors
[
0
]
!=
previous
else
neighbors
[
1
]
previous
=
current
current
=
new_node
# Add the right extremity of the unitig
if
len
(
graph
[
current
])
==
1
:
unitig
.
insert
(
0
,
current
)
already_used
[
current
]
=
True
def
compute_path_from_unitigs
(
d2g
,
unitigs
):
path
=
Path
()
return
unitigs
# Create an edge using count.
included_edges
=
{}
for
e
in
d2g
.
graph
.
edges
():
if
e
[
0
]
<
e
[
1
]:
included_edges
[(
e
[
0
],
e
[
1
])]
=
False
else
:
included_edges
[(
e
[
1
],
e
[
0
])]
=
False
remaining_edges
=
len
(
included_edges
)
# Sort the unitigs by decreasing size
unitigs
.
sort
(
key
=
lambda
x
:
len
(
x
.
nodes
),
reverse
=
True
)
# Select the first unitig that contain at least 1 uncovered edge
used_unitigs
=
[]
nb_used
=
0
for
unitig
in
unitigs
:
if
remaining_edges
==
0
:
break
# Compute usabable edges
edge_found
=
False
for
edge
in
unitig
.
get_original_edges
():
if
not
included_edges
[
edge
]:
edge_found
=
True
included_edges
[
edge
]
=
True
remaining_edges
-=
1
# Nothing to improve the solution
if
not
edge_found
:
continue
# Add it to the path
path
.
add_path
(
unitig
)
nb_used
+=
1
print
(
f
"Unitig used:
{
nb_used
}
/
{
len
(
unitigs
)
}
"
)
return
path
deconvolution/d2_graph.py
View file @
cf3ae74c
import
networkx
as
nx
import
itertools
from
bidict
import
bidict
from
d_graph
import
compute_all_max_d_graphs
...
...
@@ -13,7 +14,24 @@ class D2Graph(object):
# Compute all the d-graphs
if
verbose
:
print
(
"Compute the unit d-graphs"
)
self
.
d_graphs
=
compute_all_max_d_graphs
(
self
.
graph
)
self
.
d_graphs_per_node
=
compute_all_max_d_graphs
(
self
.
graph
)
self
.
all_d_graphs
=
[]
for
d_graphs
in
self
.
d_graphs_per_node
.
values
():
self
.
all_d_graphs
.
extend
(
d_graphs
)
# Name the d-graphs
# Number the d_graphs
for
idx
,
d_graph
in
enumerate
(
self
.
all_d_graphs
):
d_graph
.
idx
=
idx
# Number the edges from original graph
self
.
edge_idxs
=
{}
self
.
nb_uniq_edge
=
0
for
idx
,
edge
in
enumerate
(
self
.
graph
.
edges
()):
if
edge
==
(
edge
[
1
],
edge
[
0
]):
self
.
nb_uniq_edge
+=
1
self
.
edge_idxs
[
edge
]
=
idx
self
.
edge_idxs
[(
edge
[
1
],
edge
[
0
])]
=
idx
# Index all the d-graphes
if
verbose
:
...
...
@@ -25,59 +43,58 @@ class D2Graph(object):
self
.
distances
=
self
.
compute_distances
()
# Create the graph
self
.
graph
,
self
.
nodes
=
self
.
to_nx_graph
()
self
.
nx_
graph
,
self
.
nodes
=
self
.
to_nx_graph
()
def
save
(
self
,
filename
):
with
open
(
filename
,
"w"
)
as
fp
:
# First line nb_nodes nb_cov_var
fp
.
write
(
f
"
{
len
(
self
.
all_d_graphs
)
}
{
int
((
len
(
self
.
edge_idxs
)
+
self
.
nb_uniq_edge
)
/
2
)
}
\n
"
)
# Write the edges per d_graph
for
d_graph
in
self
.
all_d_graphs
:
fp
.
write
(
f
"
{
d_graph
.
idx
}
{
' '
.
join
([
str
(
self
.
edge_idxs
[
e
])
for
e
in
d_graph
.
edges
])
}
\n
"
)
# Write the distances
for
d_graph
in
self
.
all_d_graphs
:
for
neighbor_idx
,
dist
in
self
.
distances
[
d_graph
.
idx
].
items
():
fp
.
write
(
f
"
{
d_graph
.
idx
}
{
neighbor_idx
}
{
dist
}
\n
"
)
def
create_index_from_tuples
(
self
,
tuple_size
=
3
):
index
=
{}
perfect
=
0
for
node
in
self
.
d_graphs
:
for
dg
in
self
.
d_graphs
[
node
]:
nodelist
=
dg
.
to_list
()
nodelist
.
sort
()
if
len
(
nodelist
)
<
tuple_size
:
continue
# Generate all tuplesize-mers
for
dmer
in
itertools
.
combinations
(
nodelist
,
tuple_size
):
if
not
dmer
in
index
:
index
[
dmer
]
=
[
dg
]
else
:
index
[
dmer
].
append
(
dg
)
for
dg
in
self
.
all_d_graphs
:
nodelist
=
dg
.
to_list
()
nodelist
.
sort
()
if
len
(
nodelist
)
<
tuple_size
:
continue
# Generate all tuplesize-mers
for
dmer
in
itertools
.
combinations
(
nodelist
,
tuple_size
):
if
not
dmer
in
index
:
index
[
dmer
]
=
[
dg
]
else
:
index
[
dmer
].
append
(
dg
)
return
index
def
compute_distances
(
self
):
distances
=
{}
distances
=
{
dg
.
idx
:{}
for
dg
in
self
.
all_d_graphs
}
for
dmer
,
dgraphs
in
self
.
index
.
items
():
if
len
(
dgraphs
)
==
1
:
continue
for
idx1
,
dg1
in
enumerate
(
dgraphs
):
# Add dist dict for dg1
if
not
dg1
in
distances
:
distances
[
dg1
]
=
{}
for
idx2
in
range
(
idx1
+
1
,
len
(
dgraphs
)):
dg2
=
dgraphs
[
idx2
]
if
dg1
==
dg2
:
continue
# Add dist dict for dg2
if
not
dg2
in
distances
:
distances
[
dg2
]
=
{}
# Distance computing and adding in the dist dicts
d
=
dg1
.
distance_to
(
dg2
)
distances
[
dg1
][
dg2
]
=
d
distances
[
dg2
][
dg1
]
=
d
if
len
(
distances
[
dg1
])
==
0
:
del
distances
[
dg1
]
distances
[
dg1
.
idx
][
dg2
.
idx
]
=
d
distances
[
dg2
.
idx
][
dg1
.
idx
]
=
d
return
distances
...
...
@@ -86,8 +103,8 @@ class D2Graph(object):
index
=
{}
perfect
=
0
for
node
in
self
.
d_graphs
:
for
dg
in
self
.
d_graphs
[
node
]:
for
node
in
self
.
d_graphs
_per_node
:
for
dg
in
self
.
d_graphs
_per_node
[
node
]:
lst
=
dg
.
to_ordered_lists
()
# Generate all dmers without the first node
# pull all the values
...
...
@@ -133,7 +150,7 @@ class D2Graph(object):
for
prev_node
in
self
.
index
[
dmer
][:
d_idx
]:
G
.
add_edge
(
nodes
[
dg
],
nodes
[
prev_node
])
return
G
,
nodes
return
G
,
bidict
(
nodes
)
\ No newline at end of file
deconvolution/d2_path.py
View file @
cf3ae74c
import
networkx
as
nx
class
Path
(
object
):
...
...
@@ -6,9 +8,86 @@ class Path(object):
self
.
nodes
=
[]
def
add_nodes
(
self
,
nodes
):
self
.
nodes
.
extend
(
nodes
)
def
add_path
(
self
,
path
):
self
.
add_nodes
(
path
.
nodes
)
def
get_score
(
self
):
return
0
# class D2_Path_finder(object):
def
__repr__
(
self
):
return
f
"[
{
','
.
join
([
str
(
x
)
for
x
in
self
.
nodes
])
}
]"
def
save_d2
(
self
,
d2g
,
file
):
if
len
(
self
.
nodes
)
==
0
:
return
graph_path
=
nx
.
Graph
()
graph_path
.
add_node
(
self
.
nodes
[
0
])
for
idx
,
node
in
enumerate
(
self
.
nodes
[
1
:]):
dg1
=
d2g
.
nodes
.
inverse
[
node
]
graph_path
.
add_node
(
node
)
prev_node
=
self
.
nodes
[
idx
]
dg2
=
d2g
.
nodes
.
inverse
[
prev_node
]
if
dg1
in
d2g
.
distances
[
dg2
]:
graph_path
.
add_edge
(
node
,
prev_node
)
nx
.
write_gexf
(
graph_path
,
file
)
class
Unitig
(
Path
):
def
__init__
(
self
,
d2g
,
node_list
):
super
(
Unitig
,
self
).
__init__
()
self
.
nodes
=
node_list
self
.
d2g
=
d2g
def
add_left
(
self
,
node
):
self
.
nodes
.
insert
(
0
,
node
)
def
add_right
(
self
,
node
):
self
.
nodes
.
append
(
node
)
def
get_original_edges
(
self
):
edges
=
[]
for
node
in
self
.
nodes
:
dg
=
self
.
d2g
.
nodes
.
inverse
[
node
]
edges
.
extend
(
dg
.
edges
)
return
frozenset
(
edges
)
class
UnitigGraph
():
def
__init__
(
self
,
d2g
,
unitigs
):
self
.
d2g
=
dg2
self
.
unitigs
=
unitigs
def
compute_unitig_graph
(
self
):
ug
=
nx
.
Graph
()
# Get unitig extremities and index them
borders
=
{}
for
unitig
in
self
.
unitigs
:
borders
.
append
[
unitig
.
nodes
[
0
]]
=
unitig
if
len
(
unitig
.
nodes
)
>
1
:
borders
.
append
[
unitig
.
nodes
[
-
1
]]
=
unitig
border_nodes
=
frozenset
(
borders
.
keys
())
# link borders together
for
unitig
in
self
.
unitigs
:
pass
deconvolution/d_graph.py
View file @
cf3ae74c
...
...
@@ -8,11 +8,13 @@ class Dgraph(object):
"""docstring for Dgraph"""
def
__init__
(
self
,
center
):
super
(
Dgraph
,
self
).
__init__
()
self
.
idx
=
-
1
self
.
center
=
center
self
.
score
=
0
self
.
halves
=
[
None
,
None
]
self
.
connexity
=
[
None
,
None
]
self
.
nodes
=
[
center
]
self
.
edges
=
[]
""" Compute the d-graph quality (score) according to the connectivity between the two halves.
...
...
@@ -27,6 +29,7 @@ class Dgraph(object):
self
.
nodes
=
sorted
([
self
.
center
]
+
h1
+
h2
)
self
.
connexity
[
0
]
=
{
key
:
0
for
key
in
self
.
halves
[
0
]}
self
.
connexity
[
1
]
=
{
key
:
0
for
key
in
self
.
halves
[
1
]}
self
.
edges
=
[]
# Compute link arities
for
node1
in
h1
:
...
...
@@ -38,6 +41,12 @@ class Dgraph(object):
self
.
connexity
[
0
][
node1
]
+=
1
self
.
connexity
[
1
][
node2
]
+=
1
if
node1
<
node2
:
self
.
edges
.
append
((
node1
,
node2
))
elif
node2
<
node1
:
self
.
edges
.
append
((
node2
,
node1
))
# Sort the halves by descending connexity
connex
=
self
.
connexity
self
.
halves
[
0
].
sort
(
reverse
=
True
,
key
=
lambda
v
:
connex
[
0
][
v
])
...
...
deconvolution/deconvolve.py
View file @
cf3ae74c
...
...
@@ -9,7 +9,7 @@ import itertools
import
d_graph
as
dg
import
d2_graph
as
d2
from
d2_algorithms
import
greedy_reduct
,
filter_singeltons
,
compute_unitigs
from
d2_algorithms
import
greedy_reduct
,
filter_singeltons
,
compute_unitigs
,
compute_path_from_unitigs
def
main
():
# Parsing the input file
...
...
@@ -21,18 +21,20 @@ def main():
G
=
nx
.
read_gexf
(
filename
)
d2g
=
d2
.
D2Graph
(
G
)
d2g
.
save
(
"data/optimization.tsv"
)
G
,
names
=
d2g
.
to_nx_graph
()
nx
.
write_gexf
(
G
,
"data/d2_graph.gexf"
)
print
(
"Greedy reduction of the graph"
)
greedy
=
filter_singeltons
(
greedy_reduct
(
d2g
))
nx
.
write_gexf
(
greedy
,
"data/d2_graph_greedy.gexf"
)
print
(
"Compute unitigs from greedy reducted graph"
)
unitigs
=
compute_unitigs
(
greedy
)
print
(
"
\n
"
.
join
([
str
(
x
)
for
x
in
unitigs
]))
print
(
len
(
unitigs
))
unitigs
=
compute_unitigs
(
greedy
,
d2g
)
# Compute greedy complete path from unitigs regarding most efficient path between them
path
=
compute_path_from_unitigs
(
d2g
,
unitigs
)
path
.
save_d2
(
d2g
,
"data/d2_greedy_path.gexf"
)
if
__name__
==
"__main__"
:
...
...
deconvolution/requirements.txt
View file @
cf3ae74c
networkx>=2.2
termcolor>=1.1
\ No newline at end of file
termcolor>=1.1
bidict>=0.18
\ No newline at end of file
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment