Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
0d701c92
Commit
0d701c92
authored
Jun 05, 2019
by
Yoann Dufresne
Browse files
simulation of d2_graph
parent
c22be631
Changes
1
Hide whitespace changes
Inline
Side-by-side
deconvolution/generate_fake_d2_graph.py
0 → 100755
View file @
0d701c92
#!/usr/bin/env python3
import
networkx
as
nx
import
itertools
as
it
import
sys
from
bidict
import
bidict
barcode_graph
=
nx
.
read_gexf
(
sys
.
argv
[
1
])
d_size
=
int
(
sys
.
argv
[
2
])
# label molecule nodes
nodes
=
list
(
barcode_graph
.
nodes
())
node_per_mol
=
{}
for
node
in
nodes
:
# Parse names
name
,
idxs
=
node
.
split
(
":"
)
idxs
=
[
int
(
idx
)
for
idx
in
idxs
.
split
(
"_"
)]
# index nodes per molecule
for
idx
in
idxs
:
node_per_mol
[
idx
]
=
node
# enumerate d-graphs
d_graphs
=
{}
for
idx
,
start_idx
in
enumerate
(
range
(
len
(
nodes
)
-
2
*
d_size
)):
d_graphs
[
idx
]
=
tuple
([
node_per_mol
[
idx
]
for
idx
in
range
(
start_idx
,
start_idx
+
2
*
d_size
+
1
)])
d_graphs
=
bidict
(
d_graphs
)
# compute d-graph edges by indexing then distance
index
=
{}
for
idx
,
d_graph
in
d_graphs
.
items
():
# Generate all tuplesize-mers
for
dmer
in
it
.
combinations
(
d_graph
,
3
):
dmer
=
tuple
(
sorted
(
list
(
dmer
)))
if
not
dmer
in
index
:
index
[
dmer
]
=
[
d_graph
]
else
:
index
[
dmer
].
append
(
d_graph
)
def
distance
(
dg1
,
dg2
):
dg1
=
sorted
(
dg1
)
dg2
=
sorted
(
dg2
)
distance
=
0
idx1
,
idx2
=
0
,
0
while
idx1
<
len
(
dg1
)
and
idx2
<
len
(
dg2
):
if
dg1
[
idx1
]
<
dg2
[
idx2
]:
idx1
+=
1
distance
+=
1
elif
dg1
[
idx1
]
>
dg2
[
idx2
]:
idx2
+=
1
distance
+=
1
else
:
idx1
+=
1
idx2
+=
1
distance
+=
len
(
dg1
)
-
idx1
+
len
(
dg2
)
-
idx2
return
distance
distances
=
{
idx
:{}
for
idx
in
d_graphs
}
for
_
,
graphs
in
index
.
items
():
for
pair
in
it
.
combinations
([
i
for
i
in
range
(
len
(
graphs
))],
2
):
g1
,
g2
=
graphs
[
pair
[
0
]],
graphs
[
pair
[
1
]]
x
,
y
=
d_graphs
.
inverse
[
g1
],
d_graphs
.
inverse
[
g2
]
distances
[
x
][
y
]
=
distances
[
y
][
x
]
=
distance
(
list
(
g1
),
list
(
g2
))
# print(distances)
G2
=
nx
.
Graph
()
for
idx
in
d_graphs
:
G2
.
add_node
(
idx
)
for
idx1
,
nodes
in
distances
.
items
():
for
idx2
,
dist
in
nodes
.
items
():
G2
.
add_edge
(
idx1
,
idx2
)
# output = sys.argv[1].replace("molecule", "barcode").replace(".graphml", f"_{m}.gexf")
nx
.
write_gexf
(
G2
,
'data/d2_simulated.gexf'
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment