Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
56839d4c
Commit
56839d4c
authored
Jan 24, 2020
by
rchikhi
Browse files
added genome molecule graph generation
parent
6cb17069
Changes
1
Hide whitespace changes
Inline
Side-by-side
deconvolution/main/generate_genome_molecule_graph.py
0 → 100755
View file @
56839d4c
#!/usr/bin/env python3
import
argparse
import
numpy
as
np
import
networkx
as
nx
from
intervaltree
import
Interval
,
IntervalTree
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Generate a fake 10X molecule graph.'
)
parser
.
add_argument
(
'--num_molecule'
,
'-n'
,
type
=
int
,
required
=
True
,
help
=
'The number of molecule in the final graph'
)
parser
.
add_argument
(
'--genome_size'
,
'-g'
,
type
=
int
,
required
=
True
,
help
=
'genome size'
)
parser
.
add_argument
(
'--avg_mol_len'
,
'-l'
,
type
=
int
,
required
=
True
,
help
=
'avg molecule size'
)
parser
.
add_argument
(
'--min_mol_ovl'
,
'-b'
,
type
=
int
,
default
=
0
,
help
=
'minimum overlap length between molecules'
)
parser
.
add_argument
(
'--rnd_seed'
,
'-s'
,
type
=
int
,
default
=-
1
,
help
=
'Random seed. Used for reproducibility purpose. Please do not use it in production.'
)
parser
.
add_argument
(
'--output'
,
'-o'
,
help
=
"Output filename"
)
args
=
parser
.
parse_args
()
return
args
def
generate_graph
(
nb_molecules
,
genome_size
,
avg_mol_size
,
min_mol_ovl
,
rnd_seed
=
None
):
# Reproducibility
if
rnd_seed
!=
-
1
:
np
.
random
.
seed
(
rnd_seed
)
G
=
nx
.
Graph
()
molecules
=
dict
()
# mol index: (start,end)
for
idx
in
range
(
nb_molecules
):
G
.
add_node
(
idx
)
#pick a starting position (follows LRSIM)
start_pos
=
np
.
random
.
randint
(
0
,
genome_size
)
#pick a fragment size (follows LRSIM)
molecule_size
=
np
.
random
.
poisson
(
avg_mol_size
)
print
(
"molecule"
,
idx
,
"start"
,
start_pos
,
"end"
,
start_pos
+
molecule_size
)
molecules
[
idx
]
=
(
start_pos
,
start_pos
+
molecule_size
)
# compute overlaps between molecules
itree
=
IntervalTree
()
for
idx
in
molecules
:
start
,
end
=
molecules
[
idx
]
itree
.
addi
(
start
,
end
,
idx
)
# create graph edges corresponding to molecules overlaps
for
idx
in
molecules
:
start
,
end
=
molecules
[
idx
]
for
itv
in
itree
.
overlap
(
start
,
end
):
other_idx
=
itv
.
data
if
idx
==
other_idx
:
continue
G
.
add_edge
(
idx
,
other_idx
)
return
G
def
save_graph
(
G
,
outfile
):
import
networkx
as
nx
nx
.
write_gexf
(
G
,
outfile
)
if
__name__
==
"__main__"
:
args
=
parse_arguments
()
G
=
generate_graph
(
args
.
num_molecule
,
args
.
genome_size
,
args
.
avg_mol_len
,
args
.
min_mol_ovl
,
args
.
rnd_seed
)
outfile
=
f
"simulated_molecules_
{
args
.
num_molecule
}
_
{
args
.
genome_size
}
_
{
args
.
avg_mol_len
}
_
{
args
.
min_mol_ovl
}
.gexf"
if
args
.
output
:
outfile
=
args
.
output
save_graph
(
G
,
outfile
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment