Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
e8599455
Commit
e8599455
authored
Sep 04, 2019
by
Yoann Dufresne
Browse files
cleaning workspace
parent
ade48191
Changes
11
Hide whitespace changes
Inline
Side-by-side
deconvolution/count.py
deleted
100644 → 0
View file @
ade48191
import
sys
print
(
sys
.
readline
())
deconvolution/d2_graph.py
View file @
e8599455
...
...
@@ -49,6 +49,9 @@ class D2Graph(nx.Graph):
return
G
def
clone
(
self
):
return
self
.
subgraph
(
list
(
self
.
nodes
()))
def
construct_from_barcodes
(
self
,
index_size
=
3
,
verbose
=
True
,
debug
=
False
):
# Compute all the d-graphs
...
...
@@ -79,6 +82,14 @@ class D2Graph(nx.Graph):
self
.
compute_distances
()
def
get_covering_variables
(
self
,
udg
):
variables
=
[]
for
e
in
udg
.
edges
:
variables
.
append
(
self
.
barcode_edge_idxs
[
e
])
return
frozenset
(
variables
)
def
save
(
self
,
filename
):
with
open
(
filename
,
"w"
)
as
fp
:
# First line nb_nodes nb_cov_var
...
...
@@ -245,4 +256,3 @@ class D2Graph(nx.Graph):
for
dmer
in
removable_dmers
:
del
self
.
index
[
dmer
]
deconvolution/d2_path.py
View file @
e8599455
...
...
@@ -15,6 +15,12 @@ class Path(list):
if
len
(
udgs
)
==
0
:
return
for
udg
in
udgs
:
# Register edges
for
barcode_edge
in
udg
.
edges
:
edge_idx
=
self
.
d2g
.
barcode_edge_idxs
[
barcode_edge
]
self
.
covering_variables
[
edge_idx
]
+=
1
# Special case for previously empty path
if
len
(
self
)
==
0
:
# 4 because it's the ideal case (1 node of difference with same length on 1 shift.
...
...
@@ -31,14 +37,6 @@ class Path(list):
# Add the node
self
.
append
(
udg
)
# Register edges
for
barcode_edge
in
udg
.
edges
:
edge_idx
=
self
.
d2g
.
barcode_edge_idxs
[
barcode_edge
]
self
.
covering_variables
[
edge_idx
]
+=
1
def
revert
(
self
):
self
.
reverse
()
def
normalized_penalty
(
self
):
return
self
.
penalty
/
len
(
self
)
...
...
@@ -63,6 +61,16 @@ class Path(list):
return
num_covered
def
save_path
(
self
,
filename
):
d2p
=
self
.
d2g
.
subgraph
([
str
(
x
.
idx
)
for
x
in
self
])
nx
.
write_gexf
(
d2p
,
filename
)
def
save_path_in_graph
(
self
,
filename
):
d2c
=
self
.
d2g
.
clone
()
for
idx
,
udg
in
enumerate
(
self
):
d2c
.
nodes
[
str
(
udg
.
idx
)][
"path"
]
=
idx
nx
.
write_gexf
(
d2c
,
filename
)
class
Unitig
(
Path
):
...
...
deconvolution/d2_to_path.py
View file @
e8599455
...
...
@@ -37,10 +37,32 @@ def main():
# Take the principal component
largest_component_nodes
=
max
(
nx
.
connected_components
(
d2g
),
key
=
len
)
largest_component
=
d2g
.
subgraph
(
largest_component_nodes
)
unitigs
=
compute_unitigs
(
largest_component
)
path
=
pa
.
construct_path_from_unitigs
(
unitigs
,
largest_component
)
print
(
path
.
covering_score
())
import
path_optimization
as
po
# Start optimization
optimizer
=
po
.
Optimizer
(
largest_component
)
optimizer
.
init_random_solutions
(
1
)
solution
=
optimizer
.
solutions
[
0
]
print
(
solution
)
print
(
solution
.
covering_score
())
optimizer
.
extends_until_end
(
solution
)
print
(
solution
.
covering_score
())
solution
.
save_path_in_graph
(
"data/test_d2_path.gexf"
)
solution
.
save_path
(
"data/test_path.gexf"
)
# unitigs = compute_unitigs(largest_component)
# path = pa.construct_path_from_unitigs(unitigs, largest_component)
# print("\n".join([str(x) for x in path]))
# print(path.covering_score())
# diameter = nx.diameter(largest_component)
# print(diameter)
# Write the simplified graph
# nx.write_gexf(d2g.nx_graph, args.output_d2_name)
...
...
deconvolution/deconvolve.py
deleted
100755 → 0
View file @
ade48191
#!/usr/bin/env python3
import
sys
import
math
import
networkx
as
nx
import
itertools
import
d_graph
as
dg
import
d2_graph
as
d2
from
d2_algorithms
import
greedy_reduct
,
filter_singeltons
,
compute_unitigs
,
compute_path_from_unitigs
def
main
():
# Parsing the input file
filename
=
sys
.
argv
[
1
]
G
=
None
if
filename
.
endswith
(
'.graphml'
):
G
=
nx
.
read_graphml
(
filename
)
elif
filename
.
endswith
(
'.gexf'
):
G
=
nx
.
read_gexf
(
filename
)
d2g
=
d2
.
D2Graph
(
G
,
index_size
=
8
)
d2g
.
save
(
"data/optimization.tsv"
)
G
,
names
=
d2g
.
to_nx_graph
()
nx
.
write_gexf
(
G
,
"data/d2_graph.gexf"
)
print
(
"Greedy reduction of the graph"
)
greedy
=
greedy_reduct
(
d2g
)
nx
.
write_gexf
(
greedy
,
"data/d2_graph_greedy.gexf"
)
# print("Compute unitigs from greedy reducted graph")
# unitigs = compute_unitigs(greedy, d2g)
# # Compute greedy complete path from unitigs regarding most efficient path between them
# path = compute_path_from_unitigs(d2g, unitigs)
# path.save_d2(d2g, "data/d2_greedy_path.gexf")
if
__name__
==
"__main__"
:
main
()
deconvolution/generate_duplicated.py
deleted
100644 → 0
View file @
ade48191
import
networkx
as
nx
G
=
nx
.
Graph
()
labels
=
list
(
range
(
30
))
# create nodes
names
=
{}
for
lab
in
labels
:
G
.
add_node
(
lab
)
names
[
lab
]
=
lab
nx
.
set_node_attributes
(
G
,
names
,
"test"
)
# insert duplications
labels
.
insert
(
23
,
7
)
print
(
labels
)
# create links
for
i
,
lab
in
enumerate
(
labels
):
for
j
in
range
(
i
+
1
,
min
(
i
+
4
,
len
(
labels
))):
G
.
add_edge
(
lab
,
labels
[
j
])
nx
.
write_graphml
(
G
,
"simple_duplicated_3links.graphml"
)
deconvolution/path_algorithms.py
View file @
e8599455
...
...
@@ -29,7 +29,6 @@ def construct_path_from_unitigs(unitigs, d2g):
unitigs
=
[
utg
for
utg
in
unitigs
if
path
.
covering_difference
(
utg
)
>
0
]
print
()
return
path
...
...
@@ -59,7 +58,7 @@ def _search_way_to_next_unitig(path, unitigs, d2g):
for
extension
in
best_paths
:
utg
=
endpoints
[
extension
[
-
1
]]
# if the utg is in the wrong size
if
utg
[
-
1
]
==
path
[
-
1
]:
if
utg
[
-
1
]
==
extension
[
-
1
]:
utg
.
reverse
()
complete_path
=
Path
(
d2g
)
...
...
@@ -83,6 +82,7 @@ def _search_way_to_next_unitig(path, unitigs, d2g):
"""
def
_search_endpoint
(
start_udg
,
targets
,
d2g
,
forbidden_udgs
):
marked_nodes
=
{
x
:
(
None
,
None
)
for
x
in
forbidden_udgs
}
covered_variables
=
set
(
forbidden_udgs
.
covering_variables
.
keys
())
# Init Dijkstra
to_explore
=
[
start_udg
]
...
...
@@ -94,8 +94,17 @@ def _search_endpoint(start_udg, targets, d2g, forbidden_udgs):
# Select min penalty in to_explore
current
=
min
(
to_explore
,
key
=
lambda
x
:
marked_nodes
[
x
][
0
])
current_penalty
=
marked_nodes
[
current
][
0
]
to_explore
.
remove
(
current
)
# Filter neighbors by there covering values
neighbors
=
d2g
.
neighbors
(
str
(
current
.
idx
))
filtered_neighbors
=
[]
for
n
in
neighbors
:
nei
=
d2g
.
node_by_idx
[
int
(
n
)]
if
len
(
d2g
.
get_covering_variables
([
nei
])
-
covered_variables
)
>
0
:
filtered_neighbors
.
append
(
n
)
neighbors
=
filtered_neighbors
# Explore all the neighbors of the current node.
for
nei_idx
in
neighbors
:
nei_udg
=
d2g
.
node_by_idx
[
int
(
nei_idx
)]
...
...
deconvolution/path_optimization.py
0 → 100644
View file @
e8599455
import
random
from
d2_path
import
Path
class
Optimizer
:
def
__init__
(
self
,
d2g
):
self
.
d2g
=
d2g
self
.
solutions
=
[]
def
init_random_solutions
(
self
,
nb_solutions
):
for
_
in
range
(
nb_solutions
):
rnd_sol
=
Solution
(
self
.
d2g
)
rnd_sol
.
random_init
()
self
.
solutions
.
append
(
rnd_sol
)
def
extends_until_end
(
self
,
solution
):
while
self
.
extends
(
solution
):
print
(
len
(
solution
))
solution
.
reverse
()
print
(
"reverse the solution"
)
while
self
.
extends
(
solution
):
print
(
len
(
solution
))
def
extends
(
self
,
solution
):
# Get all the neighbors
cur_id
=
str
(
solution
[
-
1
].
idx
)
neighbors
=
[
self
.
d2g
.
node_by_idx
[
int
(
x
)]
for
x
in
self
.
d2g
.
neighbors
(
cur_id
)
if
self
.
d2g
.
node_by_idx
[
int
(
x
)]
not
in
solution
]
# filter the neighbors if they are not contributing to variable coverage
# filtered = []
current_vars
=
frozenset
([
x
for
x
,
y
in
solution
.
covering_variables
.
items
()
if
y
>
0
])
# for nei_id in neighbors:
# nei = self.d2g.node_by_idx[int(nei_id)]
# variables = self.d2g.get_covering_variables(nei)
# new_vars = variables - current_vars
# if len(new_vars) > 0:
# filtered.append(nei)
if
len
(
neighbors
)
==
0
:
return
False
# Choose using the multiple optimization directions
next_udg
=
min
(
neighbors
,
key
=
lambda
x
:
(
1
if
len
(
self
.
d2g
.
get_covering_variables
(
x
)
-
current_vars
)
==
0
else
0
,
self
.
d2g
[
str
(
x
.
idx
)][
cur_id
][
"distance"
],
x
.
get_link_divergence
()))
solution
.
add_path
([
next_udg
])
return
True
class
Solution
(
Path
):
def
__init__
(
self
,
d2g
):
super
(
Solution
,
self
).
__init__
(
d2g
)
def
random_init
(
self
):
random_node_idx
=
random
.
choice
(
list
(
self
.
d2g
.
nodes
()))
random_node
=
self
.
d2g
.
node_by_idx
[
int
(
random_node_idx
)]
self
.
clear
()
self
.
add_path
([
random_node
])
deconvolution/requirements.txt
deleted
100644 → 0
View file @
ade48191
networkx>=2.2
termcolor>=1.1
bidict>=0.18
\ No newline at end of file
deconvolution/test.py
deleted
100644 → 0
View file @
ade48191
import
networkx
as
nx
G
=
nx
.
path_graph
(
3
)
print
(
G
.
edges
(
data
=
True
))
edge_data
=
G
[
0
][
1
]
edge_data
[
"test"
]
=
2
# nx.set_edge_attributes(G, 0, "test")
# edge = list(G.edges(data=True))[0]
print
(
G
.
edges
(
data
=
True
))
requirements.txt
View file @
e8599455
termcolor
networkx
bidict
pytest
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment