Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
71d73af0
Commit
71d73af0
authored
Jul 10, 2019
by
Yoann Dufresne
Browse files
Safe script for merging a molecule graph into a uniform barcode graph
parent
a96e76f7
Changes
3
Hide whitespace changes
Inline
Side-by-side
deconvolution/generate_fake_barcode_graph.py
View file @
71d73af0
#!/usr/bin/env python3
import
networkx
as
nx
import
sys
import
random
import
argparse
import
graph_manipulator
as
gm
G
=
nx
.
read_graphml
(
sys
.
argv
[
1
])
# label molecule nodes
labels
=
{}
for
idx
,
node
in
enumerate
(
G
.
nodes
()):
labels
[
node
]
=
str
(
idx
)
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Transform a 10X molecule graph into a 10X barcode graph.'
)
parser
.
add_argument
(
'--merging_depth'
,
'-m'
,
type
=
int
,
required
=
True
,
help
=
'Number of nodes to merge together'
)
parser
.
add_argument
(
'--input_graph'
,
'-i'
,
required
=
True
,
help
=
'A 10X molecule graph gexf formated.'
)
parser
.
add_argument
(
'--output'
,
'-o'
,
help
=
"Output filename"
)
parser
.
add_argument
(
'--random_seed'
,
'-s'
,
type
=
int
,
help
=
"If you want to fix the random seed for reproducibility"
)
# artificially make barcodes
barcodes
=
[]
n
=
len
(
G
.
nodes
())
available_molecules
=
set
(
G
.
nodes
())
m
=
3
# m molecules per barcode
args
=
parser
.
parse_args
()
return
args
# Group molecules by barcode
import
random
while
len
(
available_molecules
)
>
0
:
barcode
=
set
(
random
.
sample
(
available_molecules
,
m
))
available_molecules
-=
barcode
# print(barcode)
barcodes
+=
[
barcode
]
# Associate molecule to barcode
molecule_barcode
=
dict
()
for
barcode_index
,
barcode
in
enumerate
(
barcodes
):
for
mol
in
barcode
:
molecule_barcode
[
mol
]
=
barcode_index
print
(
molecule_barcode
)
# Generate barcoded graph nodes
G2
=
nx
.
Graph
()
g2_labels
=
{}
for
barcode_index
,
barcode_molecules
in
enumerate
(
barcodes
):
bar_names
=
"_"
.
join
(
barcode_molecules
)
g2_labels
[
barcode_index
]
=
f
"
{
barcode_index
}
:
{
bar_names
}
"
G2
.
add_node
(
g2_labels
[
barcode_index
])
# Generate barcoded graph edges
for
mol_edge
in
G
.
edges
():
m1
,
m2
=
mol_edge
b1
,
b2
=
g2_labels
[
molecule_barcode
[
m1
]],
g2_labels
[
molecule_barcode
[
m2
]]
G2
.
add_edge
(
b1
,
b2
)
# print(G2.edges)
output
=
sys
.
argv
[
1
].
replace
(
"molecule"
,
"barcode"
).
replace
(
".graphml"
,
f
"_
{
m
}
.gexf"
)
nx
.
write_gexf
(
G2
,
output
)
""" Take a molecule d-graph chain and merge the nodes uniformly to obtain a barcode graph.
@param G A molecule graph
@param merging_depth The number of nodes to merge from the original graph to obtain one node of the barcode graph
@return The merged barcode graph
"""
def
fusion_graph
(
G
,
merging_depth
):
nodes
=
list
(
G
.
nodes
())
random
.
shuffle
(
nodes
)
label
=
0
bijective_labels
=
{}
for
idx
in
range
(
0
,
len
(
nodes
),
merging_depth
):
# Extract values to merge
sublist
=
nodes
[
idx
:
idx
+
merging_depth
]
# Merge nodes
merged
=
sublist
[
0
]
for
sub_idx
in
range
(
1
,
len
(
sublist
)):
merged
=
gm
.
merge_nodes
(
G
,
merged
,
sublist
[
sub_idx
])
# Label the node
bijective_labels
[
merged
]
=
f
"
{
label
}
:
{
merged
}
"
label
+=
1
# Relabel all the nodes
G
=
nx
.
relabel_nodes
(
G
,
bijective_labels
)
return
G
def
save_graph
(
G
,
outfile
):
nx
.
write_gexf
(
G
,
outfile
)
if
__name__
==
"__main__"
:
args
=
parse_arguments
()
if
args
.
random_seed
:
random
.
seed
(
args
.
random_seed
)
G
=
nx
.
read_gexf
(
args
.
input_graph
)
G
=
fusion_graph
(
G
,
args
.
merging_depth
)
outfile
=
f
"simulated_barcodes_
{
args
.
merging_depth
}
.gexf"
if
args
.
output
:
outfile
=
args
.
output
save_graph
(
G
,
outfile
)
deconvolution/generate_fake_molecule_graph.py
View file @
71d73af0
...
...
@@ -31,6 +31,6 @@ if __name__ == "__main__":
outfile
=
f
"simulated_molecules_
{
args
.
num_molecule
}
_
{
args
.
depth
}
.gexf"
if
args
.
output
:
outfile
=
args
.
out
file
outfile
=
args
.
out
put
save_graph
(
G
,
outfile
)
deconvolution/graph_manipulator.py
View file @
71d73af0
...
...
@@ -27,7 +27,7 @@ def generate_d_graph_chain(size, d):
@param G The graph to manipulate
@param node1 First node to merge
@param node2 Second node to merge
@return The
modified graph
G
@return The
name of the new node in
G
"""
def
merge_nodes
(
G
,
node1
,
node2
):
# Create the new node
...
...
@@ -63,5 +63,5 @@ def merge_nodes(G, node1, node2):
G
.
remove_node
(
node1
)
G
.
remove_node
(
node2
)
return
G
return
new_node
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment