Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
367747d2
Commit
367747d2
authored
Aug 13, 2019
by
Yoann Dufresne
Browse files
compute unitigs
parent
c0020617
Changes
3
Hide whitespace changes
Inline
Side-by-side
deconvolution/d2_algorithms.py
View file @
367747d2
...
...
@@ -50,7 +50,7 @@ def transitive_reduction(d2g):
print
(
f
"
{
len
(
nxg
.
edges
())
}
remaining"
)
""" For each node of the d2 graph, construct a node in the reduc
t
ed graph.
""" For each node of the d2 graph, construct a node in the reduced graph.
Then, for each node, compute the closest neighbors in d2 (with equal scores) and add an edge
in the greedy graph.
@param d2 Input d2 graph (with distances already computed)
...
...
@@ -88,94 +88,81 @@ def filter_singeltons(graph):
return
graph
def
compute_unitigs
(
graph
,
d2g
):
""" Compute the unambiguious paths
"""
""" Compute the unambiguous paths in a graph. The graph must not contain singletons.s
@param graph a networkx graph
@return a list of unitigs
"""
def
compute_unitigs
(
graph
):
unitigs
=
[]
nodelist
=
list
(
graph
.
nodes
()
)
already_used
=
{
node
:
False
for
node
in
nodelist
}
used_nodes
=
{
node
:
False
for
node
in
graph
.
nodes
()
}
current_unitig
=
Unitig
()
for
node
in
nodelist
:
#
skip the node if already in use in some unitig
if
already_used
[
node
]:
for
node
in
graph
.
nodes
()
:
#
If already tested
if
used_nodes
[
node
]:
continue
used_nodes
[
node
]
=
True
# Start the unitig
unitig
=
Unitig
(
d2g
,
[
node
])
already_used
[
node
]
=
True
# Save the unitig
unitigs
.
append
(
unitig
)
# If 1 or 2 neighbors, part of the unitig
# Change unitig if not
if
len
(
graph
[
node
])
>
2
or
len
(
graph
[
node
])
==
0
:
if
graph
.
degree
(
node
)
>
2
:
# Not a unitig
continue
# Randomly determine the right and a left neighbors
neighbors
=
list
(
graph
[
node
])
adding_functions
=
[
unitig
.
add_right
]
if
len
(
neighbors
)
==
2
:
adding_functions
.
append
(
unitig
.
add_left
)
# --- Explore the sides of the unitig ---
for
idx
in
range
(
len
(
neighbors
)):
previous
,
current
=
node
,
neighbors
[
idx
]
while
len
(
graph
[
current
])
==
2
and
not
already_used
[
current
]:
adding_functions
[
idx
](
current
)
already_used
[
current
]
=
True
# Go to next neighbor
neighbors
=
list
(
graph
[
current
])
new_node
=
neighbors
[
idx
]
if
neighbors
[
idx
]
!=
previous
else
neighbors
[(
idx
+
1
)
%
2
]
previous
=
current
current
=
new_node
# Add the extremity of the unitig
if
len
(
graph
[
current
])
==
1
:
adding_functions
[
idx
](
current
)
already_used
[
current
]
=
True
# Create new unitigs
unitig
=
compute_unitig_from
(
graph
,
node
)
unitigs
.
append
(
unitig
)
for
node
in
unitig
:
used_nodes
[
node
]
=
True
return
unitigs
def
compute_path_from_unitigs
(
d2g
,
unitigs
):
path
=
Path
()
# Create an edge using count.
included_edges
=
{}
for
e
in
d2g
.
graph
.
edges
():
if
e
[
0
]
<
e
[
1
]:
included_edges
[(
e
[
0
],
e
[
1
])]
=
False
else
:
included_edges
[(
e
[
1
],
e
[
0
])]
=
False
remaining_edges
=
len
(
included_edges
)
# Sort the unitigs by decreasing size
unitigs
.
sort
(
key
=
lambda
x
:
len
(
x
.
nodes
),
reverse
=
True
)
# Select the first unitig that contain at least 1 uncovered edge
used_unitigs
=
[]
nb_used
=
0
for
unitig
in
unitigs
:
if
remaining_edges
==
0
:
break
# Compute usabable edges
edge_found
=
False
for
edge
in
unitig
.
get_original_edges
():
if
not
included_edges
[
edge
]:
edge_found
=
True
included_edges
[
edge
]
=
True
remaining_edges
-=
1
# Nothing to improve the solution
if
not
edge_found
:
continue
# Add it to the path
path
.
add_path
(
unitig
)
nb_used
+=
1
print
(
f
"Unitig used:
{
nb_used
}
/
{
len
(
unitigs
)
}
"
)
return
path
""" Compute a unitig starting from the node regarding the graph.
The unitig will be extended on both sides
@param graph The graph to look for unitig
@param node The start node of the unitig
@return The constructed unitig
"""
def
compute_unitig_from
(
graph
,
node
):
unitig
=
Unitig
(
nodes
=
[
node
])
if
graph
.
degree
(
node
)
==
2
:
left
,
right
=
graph
.
neighbors
(
node
)
else
:
left
=
next
(
graph
.
neighbors
(
node
))
right
=
None
# Extends first side
prev_node
=
node
current_node
=
left
unitig
=
extend_unitig
(
unitig
,
graph
,
prev_node
,
current_node
)
unitig
.
revert
()
# Extends second side
prev_node
=
node
current_node
=
right
unitig
=
extend_unitig
(
unitig
,
graph
,
prev_node
,
current_node
)
return
unitig
""" Directional extension of the unitig. Uses the previous and current nodes to detect the extension direction.
@param unitig The unitig to extend. Will be modified during the execution.
@param graph The graph to follow for extension
@param prev_node Node already added in the unitig
@param current_node Node to add into the unitig and used to select the next node to add. If not set, stop the extension.
@return Return the modified unitig.
"""
def
extend_unitig
(
unitig
,
graph
,
prev_node
,
current_node
):
if
current_node
==
None
:
return
unitig
# Add the node
unitig
.
add_right
(
current_node
)
# Look for the next node
next_candidates
=
list
(
graph
.
neighbors
(
current_node
))
next_candidates
.
remove
(
prev_node
)
# Select next node
next_node
=
next_candidates
[
0
]
if
len
(
next_candidates
)
==
1
else
None
# Continue extension
return
extend_unitig
(
unitig
,
graph
,
current_node
,
next_node
)
deconvolution/d2_path.py
View file @
367747d2
import
networkx
as
nx
class
Path
(
objec
t
):
class
Path
(
lis
t
):
def
__init__
(
self
):
def
__init__
(
self
,
nodes
=
[]
):
super
(
Path
,
self
).
__init__
()
self
.
nodes
=
[]
self
.
nodes
=
[
x
for
x
in
nodes
]
def
add_nodes
(
self
,
nodes
):
...
...
@@ -16,6 +16,10 @@ class Path(object):
self
.
add_nodes
(
path
.
nodes
)
def
revert
(
self
):
self
.
nodes
=
[
x
for
x
in
self
.
nodes
[::
-
1
]]
def
get_score
(
self
):
return
0
...
...
@@ -24,31 +28,10 @@ class Path(object):
return
f
"[
{
','
.
join
([
str
(
x
)
for
x
in
self
.
nodes
])
}
]"
def
save_d2
(
self
,
d2g
,
file
):
if
len
(
self
.
nodes
)
==
0
:
return
graph_path
=
nx
.
Graph
()
graph_path
.
add_node
(
self
.
nodes
[
0
])
for
idx
,
node
in
enumerate
(
self
.
nodes
[
1
:]):
dg1
=
d2g
.
nodes
.
inverse
[
node
]
graph_path
.
add_node
(
node
)
prev_node
=
self
.
nodes
[
idx
]
dg2
=
d2g
.
nodes
.
inverse
[
prev_node
]
if
dg1
in
d2g
.
distances
[
dg2
]:
graph_path
.
add_edge
(
node
,
prev_node
)
nx
.
write_gexf
(
graph_path
,
file
)
class
Unitig
(
Path
):
def
__init__
(
self
,
d2g
,
node_list
):
super
(
Unitig
,
self
).
__init__
()
self
.
nodes
=
node_list
self
.
d2g
=
d2g
def
__init__
(
self
,
nodes
=
[]):
super
(
Unitig
,
self
).
__init__
(
nodes
)
def
add_left
(
self
,
node
):
...
...
@@ -59,35 +42,8 @@ class Unitig(Path):
self
.
nodes
.
append
(
node
)
def
get_original_edges
(
self
):
edges
=
[]
for
node
in
self
.
nodes
:
dg
=
self
.
d2g
.
nodes
.
inverse
[
node
]
edges
.
extend
(
dg
.
edges
)
return
frozenset
(
edges
)
class
UnitigGraph
():
def
__init__
(
self
,
d2g
,
unitigs
):
self
.
d2g
=
dg2
self
.
unitigs
=
unitigs
def
compute_unitig_graph
(
self
):
ug
=
nx
.
Graph
()
# Get unitig extremities and index them
borders
=
{}
for
unitig
in
self
.
unitigs
:
borders
.
append
[
unitig
.
nodes
[
0
]]
=
unitig
if
len
(
unitig
.
nodes
)
>
1
:
borders
.
append
[
unitig
.
nodes
[
-
1
]]
=
unitig
border_nodes
=
frozenset
(
borders
.
keys
())
# link borders together
for
unitig
in
self
.
unitigs
:
pass
deconvolution/d2_to_path.py
0 → 100755
View file @
367747d2
#!/usr/bin/env python3
import
networkx
as
nx
import
argparse
import
sys
import
d2_graph
as
d2
from
d2_algorithms
import
compute_unitigs
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Greedy construction of a path through the d2 graph.'
)
parser
.
add_argument
(
'barcode_graph'
,
help
=
'The barcode graph file. Must be a gefx formated file.'
)
parser
.
add_argument
(
'd2_graph'
,
help
=
'd2 graph to reduce. Must be a gefx formated file.'
)
parser
.
add_argument
(
'--output_path'
,
'-o'
,
default
=
"d2_path.gexf"
,
help
=
"Output file prefix."
)
args
=
parser
.
parse_args
()
return
args
def
main
():
# Parsing the arguments and validate them
args
=
parse_arguments
()
barcode_file
=
args
.
barcode_graph
d2_file
=
args
.
d2_graph
if
(
not
barcode_file
.
endswith
(
'.gexf'
))
or
(
not
d2_file
.
endswith
(
".gexf"
)):
print
(
"Inputs file must be gexf formatted"
,
file
=
sys
.
stderr
)
exit
(
1
)
# Loading
G
=
nx
.
read_gexf
(
barcode_file
)
d2g
=
d2
.
D2Graph
(
G
)
d2g
.
load
(
d2_file
)
# Take the principal component
largest_component_nodes
=
max
(
nx
.
connected_components
(
d2g
.
nx_graph
),
key
=
len
)
largest_component
=
d2g
.
nx_graph
.
subgraph
(
largest_component_nodes
)
unitigs
=
compute_unitigs
(
largest_component
)
# Write the simplificated graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment