Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
498df34a
Commit
498df34a
authored
Apr 28, 2020
by
Yoann Dufresne
Browse files
clique graph tests
parent
2122230c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Snakefile_clique_experiments
0 → 100644
View file @
498df34a
OUTDIR="snake_exec" if "outdir" not in config else config["outdir"]
N=[10000] if "n" not in config else config["n"] # Number of molecule to simulate
D=[5] if "d" not in config else config["d"] # Average coverage of each molecule
M=[2] if "m" not in config else config["m"] # Average number of molecule per barcode
M_DEV=[0] if "m_dev" not in config else config["m_dev"] # Std deviation for merging number
rule all:
input:
expand(f"{OUTDIR}/simu_bar_n{{n}}_d{{d}}_m{{m}}-dev{{md}}.gexf", n=N, m=M, d=D, md=M_DEV)
rule generate_barcodes:
input:
"{path}/simu_mol_{params}.gexf"
output:
"{path}/simu_bar_{params}_m{m}-dev{md}.gexf"
shell:
"python3 deconvolution/main/generate_fake_barcode_graph.py --merging_depth {wildcards.m} --deviation {wildcards.md} --input_graph {input} --output {output}"
rule generate_molecules:
output:
"{path}/simu_mol_n{n}_d{d}.gexf"
shell:
"python3 deconvolution/main/generate_fake_molecule_graph.py --num_molecule {wildcards.n} --avg_depth {wildcards.d} --output {output}"
deconvolution/d2graph/d2_graph.py
View file @
498df34a
...
...
@@ -62,7 +62,7 @@ class D2Graph(nx.Graph):
return
self
.
subgraph
(
list
(
self
.
nodes
()))
def
construct_from_barcodes
(
self
,
index_size
=
3
,
verbose
=
True
,
clique_mode
=
None
,
threads
=
1
):
def
construct_from_barcodes
(
self
,
neighbor_threshold
=
0.25
,
verbose
=
True
,
clique_mode
=
None
,
threads
=
1
):
# Compute all the d-graphs
if
verbose
:
print
(
"Computing the unit d-graphs.."
)
...
...
@@ -83,28 +83,10 @@ class D2Graph(nx.Graph):
d_graph
.
idx
=
idx
self
.
node_by_idx
[
idx
]
=
d_graph
# # Index all the d-graphs
# if verbose:
# print("Compute the dmer dgraph")
# print("\tIndexing")
# # self.index = FixedDGIndex(size=index_size)
# self.index = VariableDGIndex(size=index_size)
# for idx, dg in enumerate(self.all_d_graphs):
# if verbose:
# print(f"\r\t{idx+1}/{len(self.all_d_graphs)}", end='')
# self.index.add_dgraph(dg)
# # self.var_index.add_dgraph(dg)
# if verbose:
# print()
# print("\tFilter index")
# self.index.filter_by_entry()
# # self.index = self.create_index_from_tuples(index_size, verbose=verbose)
# # self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
# # Compute node distances for pair of dgraphs that share at least 1 dmer.
if
verbose
:
print
(
"Compute the graph"
)
# Create the graph
self
.
bidict_nodes
=
self
.
create_graph_from_node_neighborhoods
()
self
.
bidict_nodes
=
self
.
create_graph_from_node_neighborhoods
(
neighbor_threshold
)
def
get_covering_variables
(
self
,
udg
):
...
...
deconvolution/dgraph/CliqueDGFactory.py
View file @
498df34a
...
...
@@ -7,10 +7,9 @@ from deconvolution.dgraph import AbstractDGIndex
class
CliqueDGFactory
(
AbstractDGFactory
):
def
__init__
(
self
,
graph
,
min_size_clique
=
4
,
dg_max_divergence_factor
=
0.5
,
debug
=
False
,
debug_path
=
"."
):
def
__init__
(
self
,
graph
,
min_size_clique
=
4
,
debug
=
False
,
debug_path
=
"."
):
super
(
CliqueDGFactory
,
self
).
__init__
(
graph
,
debug
=
debug
)
self
.
min_size
=
min_size_clique
self
.
dg_max_divergence_factor
=
dg_max_divergence_factor
if
debug
:
self
.
debug_path
=
debug_path
...
...
deconvolution/main/to_d2_graph.py
View file @
498df34a
...
...
@@ -64,8 +64,7 @@ def main():
d2g
=
d2
.
D2Graph
(
G
,
debug
=
debug
,
debug_path
=
debug_path
)
dprint
(
"D2 graph object created"
)
dprint
(
"constructing d2 graph from barcode graph"
)
index_size
=
4
#if clique_mode is None else 3
d2g
.
construct_from_barcodes
(
index_size
=
index_size
,
clique_mode
=
clique_mode
,
threads
=
args
.
threads
)
d2g
.
construct_from_barcodes
(
neighbor_threshold
=
d2_threshold
,
clique_mode
=
clique_mode
,
threads
=
args
.
threads
)
dprint
(
"[debug] d2 graph constructed"
)
# d2g.save(f"{args.output_prefix}.tsv")
...
...
experiments/CliqueGraph.py
0 → 100644
View file @
498df34a
import
networkx
as
nx
class
CliqueGraph
(
nx
.
Graph
):
def
__init__
(
self
,
g
):
nx
.
Graph
.
__init__
(
self
)
self
.
listed_cliques
=
set
()
self
.
clique_per_node
=
{}
self
.
_nodes_from_graph
(
g
)
def
_nodes_from_graph
(
self
,
g
):
self
.
listed_cliques
=
set
()
self
.
clique_per_node
=
{
n
:
set
()
for
n
in
g
.
nodes
()}
# Generate the graph per node
for
n
in
g
.
nodes
():
# Extract the local neighborhood induced subgraph
neighbors
=
g
.
neighbors
(
n
)
subgraph
=
g
.
subgraph
(
neighbors
)
# Max clique search
cliques
=
nx
.
find_cliques
(
subgraph
)
for
clique
in
cliques
:
clique
=
frozenset
(
clique
)
# Do nothing if the clique was already detected
if
clique
in
self
.
listed_cliques
:
continue
# Add it to the nodes, and to the index
self
.
add_node
(
clique
)
self
.
listed_cliques
.
add
(
clique
)
for
node
in
clique
:
self
.
clique_per_node
[
node
].
add
(
clique
)
experiments/__init__.py
0 → 100644
View file @
498df34a
experiments/clique_graph_eval.py
0 → 100644
View file @
498df34a
import
argparse
import
networkx
as
nx
from
collections
import
Counter
from
experiments.CliqueGraph
import
CliqueGraph
from
deconvolution.dgraph.CliqueDGFactory
import
CliqueDGFactory
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
"Tests on graph barcode"
)
parser
.
add_argument
(
'barcode_graph'
,
help
=
'The barcode graph file. Must be a gexf formatted file.'
)
args
=
parser
.
parse_args
()
return
args
def
is_continuous
(
barcode_multiset
):
# Save barcode provenance
originated_barcode
=
{}
for
barcode
in
barcode_multiset
:
for
molecule
in
barcode
:
originated_barcode
[
molecule
]
=
barcode
# Create a continuous array of molecule id
ordered_molecules
=
list
(
originated_barcode
.
keys
())
ordered_molecules
.
sort
()
# Look for contiguous molecules
nb_barcode
=
sum
(
barcode_multiset
.
values
())
for
idx
in
range
(
nb_barcode
,
len
(
ordered_molecules
)
+
1
):
first_idx
=
idx
-
nb_barcode
last_idx
=
idx
-
1
# If the gap is to big continue
if
ordered_molecules
[
first_idx
]
+
nb_barcode
-
1
!=
ordered_molecules
[
last_idx
]:
continue
# Verify the number of different barcode involved
involved_barcodes
=
[
originated_barcode
[
ordered_molecules
[
idx
]]
for
idx
in
range
(
first_idx
,
idx
)]
involved_barcodes_multiset
=
Counter
(
involved_barcodes
)
if
sum
(
involved_barcodes_multiset
.
values
())
!=
nb_barcode
:
continue
# Verify the barcode content equality
involved_match
=
True
for
key
,
val
in
involved_barcodes_multiset
.
items
():
if
barcode_multiset
[
key
]
!=
val
:
involved_match
=
False
if
involved_match
:
return
True
return
False
def
iterable_to_barcode_multiset
(
clique
):
barcodes
=
[]
for
b
in
clique
:
ids
=
(
int
(
x
)
for
x
in
b
.
split
(
":"
)[
1
].
split
(
"_"
))
barcodes
.
append
(
ids
)
return
Counter
(
barcodes
)
def
analyse_clique_graph
(
barcode_graph
):
clique_graph
=
CliqueGraph
(
barcode_graph
)
continuous
=
0
for
clique
in
clique_graph
.
nodes
():
# Transform the clique in barcode set
bms
=
iterable_to_barcode_multiset
(
clique
)
# Check the contiguity
if
is_continuous
(
bms
):
continuous
+=
1
return
continuous
,
len
(
clique_graph
.
nodes
())
def
analyse_d_graphs
(
barcode_graph
):
# Generate udgs
factory
=
CliqueDGFactory
(
barcode_graph
,
1
)
udg_per_node
=
factory
.
generate_all_dgraphs
()
# Remove duplicate udgs
udgs
=
{}
for
udg_node_lst
in
udg_per_node
.
values
():
for
udg
in
udg_node_lst
:
barcodes
=
(
x
for
x
in
udg
.
to_sorted_list
())
bms
=
iterable_to_barcode_multiset
(
barcodes
)
udgs
[
barcodes
]
=
bms
continuous
=
0
for
udg
in
udgs
.
values
():
if
is_continuous
(
udg
):
continuous
+=
1
return
continuous
,
len
(
udgs
)
def
main
():
args
=
parse_arguments
()
g
=
nx
.
read_gexf
(
args
.
barcode_graph
)
continuous
,
total
=
analyse_clique_graph
(
g
)
print
(
"cliques"
)
print
(
continuous
,
"/"
,
total
)
continuous
,
total
=
analyse_d_graphs
(
g
)
print
(
"udgs"
)
print
(
continuous
,
"/"
,
total
)
if
__name__
==
"__main__"
:
main
()
setup.py
View file @
498df34a
...
...
@@ -4,7 +4,7 @@ from distutils.core import setup
setup
(
name
=
'10X-deconvolve'
,
version
=
'0.1dev'
,
packages
=
[
'deconvolution.d2graph'
,
'deconvolution.dgraph'
,
'deconvolution.main'
],
packages
=
[
'deconvolution.d2graph'
,
'deconvolution.dgraph'
,
'deconvolution.main'
,
'experiments'
],
license
=
'AGPL V3'
,
long_description
=
open
(
'README.md'
).
read
(),
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment