Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
6cb17069
Commit
6cb17069
authored
Jan 24, 2020
by
rchikhi
Browse files
merge
parents
268fcbf6
02f87907
Changes
35
Hide whitespace changes
Inline
Side-by-side
README.md
View file @
6cb17069
...
...
@@ -2,6 +2,16 @@
Trying to deconvolve single tag assignment for multiple molecules
## Installation
Install the package from the root directory.
```
bash
# For users
pip
install
.
--user
# For developers
pip
install
-e
.
--user
```
## Scripts
For the majority of the scripts, argparse is used.
...
...
@@ -48,5 +58,5 @@ Config parameters:
```
bash
snakemake
-s
Snakefile_data_simu
--config
n
=
10000
m
=[
4,6,8,10,12]
m_dev
=[
0,0.5,1,2,3]
snakemake
-s
Snakefile_d2
--config
input
=[
snakemake
-s
Snakefile_d2
--config
input
=[
snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]
]
snakemake
-s
Snakefile_d2
--config
input
=[
snake_exec/simu_bar_n10000_d5_m10-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m10-dev0.gexf,snake_exec/simu_bar_n10000_d5_m10-dev1.gexf,snake_exec/simu_bar_n10000_d5_m10-dev2.gexf,snake_exec/simu_bar_n10000_d5_m10-dev3.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m12-dev0.gexf,snake_exec/simu_bar_n10000_d5_m12-dev1.gexf,snake_exec/simu_bar_n10000_d5_m12-dev2.gexf,snake_exec/simu_bar_n10000_d5_m12-dev3.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m4-dev0.gexf,snake_exec/simu_bar_n10000_d5_m4-dev1.gexf,snake_exec/simu_bar_n10000_d5_m4-dev2.gexf,snake_exec/simu_bar_n10000_d5_m4-dev3.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m6-dev0.gexf,snake_exec/simu_bar_n10000_d5_m6-dev1.gexf,snake_exec/simu_bar_n10000_d5_m6-dev2.gexf,snake_exec/simu_bar_n10000_d5_m6-dev3.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.5.gexf,snake_exec/simu_bar_n10000_d5_m8-dev0.gexf,snake_exec/simu_bar_n10000_d5_m8-dev1.gexf,snake_exec/simu_bar_n10000_d5_m8-dev2.gexf,snake_exec/simu_bar_n10000_d5_m8-dev3.gexf]
```
Snakefile_d2
View file @
6cb17069
...
...
@@ -17,15 +17,16 @@ rule compress_data:
input:
barcode_graph=f"{WORKDIR}/{{barcode_file}}.gexf",
d2_raw=f"{WORKDIR}/{{barcode_file}}_d2_raw_maxclq.gexf",
simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf"
,
d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf",
simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf"
simplified_d2=f"{WORKDIR}/{{barcode_file}}_d2_simplified_maxclq.gexf"
#
d2_raw_louvain=f"{WORKDIR}/{{barcode_file}}_d2_raw_louvain.gexf",
#
simplified_d2_louvain=f"{WORKDIR}/{{barcode_file}}_d2_simplified_louvain.gexf"
output:
zip=f"{WORKDIR}/{{barcode_file}}.tar.gz"
shell:
f"cd {WORKDIR}/ ;"
"if [ ! -d {wildcards.barcode_file} ]; then mkdir {wildcards.barcode_file} ; fi;"
"mv *.gexf {wildcards.barcode_file}/ ;"
"mv {wildcards.barcode_file}_* {wildcards.barcode_file}/ ;"
"cp {wildcards.barcode_file}.gexf {wildcards.barcode_file}/ ;"
"tar czf {wildcards.barcode_file}.tar.gz {wildcards.barcode_file}/ ;"
"rm -rf {wildcards.barcode_file}/ ;"
"cd - ;"
...
...
@@ -37,7 +38,7 @@ rule d2_simplification:
output:
simplified_d2="{barcode_path}_d2_simplified_{method}.gexf"
shell:
"python3 deconvolution/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
"python3 deconvolution/
main/
d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
rule d2_generation:
...
...
@@ -46,7 +47,7 @@ rule d2_generation:
output:
d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf"
run:
shell(f"python3 deconvolution/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
shell(f"python3 deconvolution/
main/
to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
rule setup_workdir:
...
...
@@ -58,4 +59,4 @@ rule setup_workdir:
shell(f"if [ ! -d {WORKDIR} ]; then mkdir {WORKDIR}; fi;")
shell(f"cp {{input}} {WORKDIR}")
if input[-5:] != ".gexf":
shell("python3 deconvolution/gexf_converter.py {input}")
shell("python3 deconvolution/
main/
gexf_converter.py {input}")
Snakefile_data_simu
View file @
6cb17069
...
...
@@ -16,11 +16,11 @@ rule generate_barcodes:
output:
"{path}/simu_bar_{params}_m{m}-dev{md}.gexf"
shell:
"python3 deconvolution/generate_fake_barcode_graph.py --merging_depth {wildcards.m} --deviation {wildcards.md} --input_graph {input} --output {output}"
"python3 deconvolution/
main/
generate_fake_barcode_graph.py --merging_depth {wildcards.m} --deviation {wildcards.md} --input_graph {input} --output {output}"
rule generate_molecules:
output:
"{path}/simu_mol_n{n}_d{d}.gexf"
shell:
"python3 deconvolution/generate_fake_molecule_graph.py --num_molecule {wildcards.n} --avg_depth {wildcards.d} --output {output}"
"python3 deconvolution/
main/
generate_fake_molecule_graph.py --num_molecule {wildcards.n} --avg_depth {wildcards.d} --output {output}"
tests
/__init__.py
→
deconvolution/d2graph
/__init__.py
View file @
6cb17069
File moved
deconvolution/d2_algorithms.py
→
deconvolution/
d2graph/
d2_algorithms.py
View file @
6cb17069
import
networkx
as
nx
from
itertools
import
combinations
from
d2_path
import
Path
,
Unitig
from
deconvolution.d2graph.
d2_path
import
Unitig
""" Remove unnecessary transitions
...
...
deconvolution/d2_graph.py
→
deconvolution/
d2graph/
d2_graph.py
View file @
6cb17069
...
...
@@ -3,7 +3,11 @@ import itertools
from
bidict
import
bidict
import
sys
from
d_graph
import
Dgraph
,
compute_all_max_d_graphs
,
filter_dominated
,
list_domination_filter
from
deconvolution.dgraph.FixedDGIndex
import
FixedDGIndex
from
deconvolution.dgraph.VariableDGIndex
import
VariableDGIndex
from
deconvolution.dgraph.d_graph
import
Dgraph
from
deconvolution.dgraph.CliqueDGFactory
import
CliqueDGFactory
from
deconvolution.dgraph.LouvainDGFactory
import
LouvainDGFactory
class
D2Graph
(
nx
.
Graph
):
...
...
@@ -55,23 +59,22 @@ class D2Graph(nx.Graph):
return
self
.
subgraph
(
list
(
self
.
nodes
()))
def
construct_from_barcodes
(
self
,
index_size
=
3
,
verbose
=
True
,
debug
=
False
,
clique_mode
=
None
):
import
debug_disct
as
dd
def
construct_from_barcodes
(
self
,
index_size
=
3
,
verbose
=
True
,
debug
=
False
,
clique_mode
=
None
,
threads
=
1
):
# Compute all the d-graphs
if
verbose
:
print
(
"Computing the unit d-graphs.."
)
self
.
d_graphs_per_node
=
compute_all_max_d_graphs
(
self
.
barcode_graph
,
debug
=
debug
,
clique_mode
=
clique_mode
)
dg_factory
=
None
if
clique_mode
==
"louvain"
:
dg_factory
=
LouvainDGFactory
(
self
.
barcode_graph
)
else
:
dg_factory
=
CliqueDGFactory
(
self
.
barcode_graph
)
self
.
d_graphs_per_node
=
dg_factory
.
generate_all_dgraphs
(
debug
=
True
)
if
verbose
:
counts
=
sum
(
len
(
x
)
for
x
in
self
.
d_graphs_per_node
.
values
())
print
(
f
"
\t
{
counts
}
computed d-graphs"
)
#self.d_graphs_per_node = filter_dominated(self.d_graphs_per_node)
#if verbose:
# counts = sum(len(x) for x in self.d_graphs_per_node.values())
# print(f"\t {counts} remaining d-graphs after first filter")
for
d_graphs
in
self
.
d_graphs_per_node
.
values
():
self
.
all_d_graphs
.
extend
(
d_graphs
)
# Name the d-graphs
# Number the d_graphs
for
idx
,
d_graph
in
enumerate
(
self
.
all_d_graphs
):
d_graph
.
idx
=
idx
...
...
@@ -79,15 +82,26 @@ class D2Graph(nx.Graph):
# Index all the d-graphs
if
verbose
:
print
(
"Compute the dmer index"
)
self
.
index
=
self
.
create_index_from_tuples
(
index_size
,
verbose
=
verbose
)
self
.
filter_dominated_in_index
(
tuple_size
=
index_size
,
verbose
=
verbose
)
print
(
"Compute the dmer dgraph"
)
print
(
"
\t
Indexing"
)
# self.index = FixedDGIndex(size=index_size)
self
.
index
=
VariableDGIndex
(
size
=
2
)
for
idx
,
dg
in
enumerate
(
self
.
all_d_graphs
):
if
verbose
:
print
(
f
"
\r\t
{
idx
+
1
}
/
{
len
(
self
.
all_d_graphs
)
}
"
,
end
=
''
)
self
.
index
.
add_dgraph
(
dg
)
# self.var_index.add_dgraph(dg)
if
verbose
:
print
()
print
(
"
\t
Filter index"
)
self
.
index
.
filter_by_entry
()
# self.index = self.create_index_from_tuples(index_size, verbose=verbose)
# self.filter_dominated_in_index(tuple_size=index_size, verbose=verbose)
# Compute node distances for pair of dgraphs that share at least 1 dmer.
if
verbose
:
print
(
"Compute the graph"
)
# Create the graph
self
.
bidict_nodes
=
self
.
create_graph
()
#self.compute_distances()
def
get_covering_variables
(
self
,
udg
):
...
...
@@ -203,55 +217,3 @@ class D2Graph(nx.Graph):
# Distance computing and adding in the dist dicts
d
=
dg1
.
distance_to
(
dg2
)
data
[
"distance"
]
=
d
def
filter_dominated_in_index
(
self
,
tuple_size
=
3
,
verbose
=
True
):
to_remove
=
set
()
if
verbose
:
print
(
"
\t
Filter dominated in index"
)
# Find dominated
for
dmer_idx
,
item
in
enumerate
(
self
.
index
.
items
()):
dmer
,
dg_list
=
item
if
verbose
:
sys
.
stdout
.
write
(
f
"
\r\t
{
dmer_idx
+
1
}
/
{
len
(
self
.
index
)
}
"
)
sys
.
stdout
.
flush
()
undominated
=
list_domination_filter
(
dg_list
)
# Register dominated
if
len
(
dg_list
)
!=
len
(
undominated
):
for
dg
in
dg_list
:
if
dg
not
in
undominated
:
to_remove
.
add
(
dg
)
self
.
index
[
dmer
]
=
undominated
if
verbose
:
print
()
print
(
"
\t
Dmer removal"
)
removable_dmers
=
set
()
for
r_idx
,
r_dg
in
enumerate
(
to_remove
):
if
verbose
:
sys
.
stdout
.
write
(
f
"
\r\t
{
r_idx
+
1
}
/
{
len
(
to_remove
)
}
"
)
sys
.
stdout
.
flush
()
self
.
all_d_graphs
.
remove
(
r_dg
)
self
.
d_graphs_per_node
[
r_dg
.
center
].
remove
(
r_dg
)
# Remove dominated in index
for
dmer
in
itertools
.
combinations
(
r_dg
.
to_sorted_list
(),
tuple_size
):
if
r_dg
in
self
.
index
[
dmer
]:
self
.
index
[
dmer
]
=
list
(
filter
(
lambda
x
:
x
!=
r_dg
,
self
.
index
[
dmer
]))
if
len
(
self
.
index
[
dmer
])
==
0
:
removable_dmers
.
add
(
dmer
)
# Remove empty dmers
for
dmer
in
removable_dmers
:
del
self
.
index
[
dmer
]
if
verbose
:
print
()
deconvolution/d2_path.py
→
deconvolution/
d2graph/
d2_path.py
View file @
6cb17069
File moved
deconvolution/path_algorithms.py
→
deconvolution/
d2graph/
path_algorithms.py
View file @
6cb17069
import
networkx
as
nx
from
d2_path
import
Path
from
deconvolution.d2graph.d2_path
import
Path
""" Greedy algorithm. Start with th most probable unitig (ie lowest normalized penalty first and largest unitig for
equalities). Then extends on both side to the nearest interesting unitig.
...
...
deconvolution/path_optimization.py
→
deconvolution/
d2graph/
path_optimization.py
View file @
6cb17069
import
random
from
d2_path
import
Path
import
networkx
as
nx
from
deconvolution.d2graph.d2_path
import
Path
class
Optimizer
:
...
...
deconvolution/debug_disct.py
deleted
100644 → 0
View file @
268fcbf6
def
save
(
dict
,
filename
):
with
open
(
filename
,
"w"
)
as
fp
:
for
key
,
array
in
dict
.
items
():
fp
.
write
(
str
(
len
(
array
))
+
" "
+
key
+
"
\n
"
)
fp
.
write
(
'
\n
'
.
join
([
str
(
sorted
(
x
.
nodes
))
for
x
in
array
])
+
"
\n
"
)
print
(
filename
,
"saved"
)
def
load
(
filename
):
d
=
{}
with
open
(
filename
)
as
fp
:
value
=
None
nb_vals
=
0
for
line
in
fp
:
line
=
line
.
strip
()
if
value
==
None
:
first_space
=
line
.
find
(
' '
)
nb_vals
=
int
(
line
[:
first_space
])
value
=
line
[
first_space
+
1
:]
d
[
value
]
=
[]
else
:
d
[
value
].
append
(
line
.
strip
())
nb_vals
-=
1
if
nb_vals
==
0
:
value
=
None
print
(
filename
,
"loaded"
)
return
d
def
compare
(
d1
,
d2
):
remaining
=
set
(
d2
.
keys
())
for
key
in
d1
:
if
key
not
in
d2
:
print
(
key
,
"not present in d2"
)
remaining
.
remove
(
key
)
l1
=
sorted
([
str
(
sorted
(
x
.
nodes
))
for
x
in
d1
[
key
]])
l2
=
sorted
([
str
(
x
)
for
x
in
d2
[
key
]])
if
l1
!=
l2
:
print
(
f
"
{
key
}
: disimilar lists:"
)
s1
=
set
(
l1
)
s2
=
set
(
l2
)
print
(
s1
-
s2
)
print
(
s2
-
s1
)
for
key
in
remaining
:
print
(
key
,
"not present in d1"
)
deconvolution/dgraph/AbstractDGFactory.py
0 → 100644
View file @
6cb17069
import
networkx
as
nx
from
abc
import
abstractmethod
from
deconvolution.dgraph.FixedDGIndex
import
FixedDGIndex
class
AbstractDGFactory
:
def
__init__
(
self
,
graph
):
self
.
graph
=
graph
def
generate_all_dgraphs
(
self
,
debug
=
False
):
index
=
FixedDGIndex
(
size
=
1
)
nb_nodes
=
len
(
self
.
graph
.
nodes
())
for
idx
,
node
in
enumerate
(
self
.
graph
.
nodes
()):
if
debug
:
print
(
f
"
\r
{
idx
+
1
}
/
{
nb_nodes
}
node analysis"
,
end
=
''
)
neighbors
=
list
(
self
.
graph
.
neighbors
(
node
))
subgraph
=
nx
.
Graph
(
self
.
graph
.
subgraph
(
neighbors
))
# Fill the index by node
key
=
frozenset
({
node
})
for
dg
in
self
.
generate_by_node
(
node
,
subgraph
):
index
.
add_value
(
key
,
dg
)
index
.
filter_by_entry
()
return
index
@
abstractmethod
def
generate_by_node
(
self
,
node
,
subgraph
):
pass
# def compute_all_max_d_graphs(graph, debug=False, clique_mode=None, threads=1):
# d_graphs = FixedDGIndex(size=1)
#
# nds = list(graph.nodes())
# for idx, node in enumerate(nds):
# # print(idx+1, '/', len(nds))
# #if "MI" not in str(node): continue # for debugging; only look at deconvolved nodes
# #print(f"\r{idx+1}/{len(graph.nodes())}")
# neighbors = list(graph.neighbors(node))
# neighbors_graph = nx.Graph(graph.subgraph(neighbors))
#
# node_d_graphs = set()
#
# mode_str = " "
# if clique_mode is None:
# # Find all the cliques (equivalent to compute all the candidate half d-graph)
# cliques = []
# for clique in nx.find_cliques(neighbors_graph):
# if len(clique) > 3:
# cliques.append(clique)
# mode_str += "(max-cliques)"
# elif clique_mode == "louvain":
# louvain = community.best_partition(neighbors_graph) # louvain
# # high resolution seems to work better
# communities = [[c for c,i in louvain.items() if i == clique_id] for clique_id in set(louvain.values())]
# mode_str += "(louvain)"
# cliques = []
# for comm in communities:
# # further decompose! into necessarily 2 communities
# community_as_graph = nx.Graph(graph.subgraph(comm))
# if len(community_as_graph.nodes()) <= 2:
# cliques += [community_as_graph.nodes()]
# else:
# cliques += map(list,nx.community.asyn_fluidc(community_as_graph,2))
#
# elif clique_mode == "testing":
# # k-clique communities
# #from networkx.algorithms.community import k_clique_communities
# #cliques = k_clique_communities(neighbors_graph, 3) # related to the d-graph d parameter
# from cdlib import algorithms
# cliques_dict = algorithms.node_perception(neighbors_graph, threshold=0.75, overlap_threshold=0.75) #typical output: Sizes of found cliques (testing): Counter({6: 4, 5: 3, 4: 2, 2: 1})
# #cliques_dict = algorithms.gdmp2(neighbors_graph, min_threshold=0.9) #typical output: sizes of found cliques (testing): Counter({3: 2, 5: 1})
# #cliques_dict = algorithms.angel(neighbors_graph, threshold=0.90) # very sensitive parameters: 0.84 and 0.88 don't work at all but 0.86 does sort of
# from collections import defaultdict
# cliques_dict2 = defaultdict(list)
# for (node, values) in cliques_dict.to_node_community_map().items():
# for value in values:
# cliques_dict2[value] += [node]
# cliques = list(cliques_dict2.values())
# mode_str += "(testing)"
#
# if debug: print("node", node, "has", len(cliques), "cliques in neighborhood (of size", len(neighbors), ")")
#
# cliques_debugging = True
# if cliques_debugging:
#
# from collections import Counter
# len_cliques = Counter(map(len,cliques))
#
# # Pair halves to create d-graphes
# for idx, clq1 in enumerate(cliques):
# for clq2_idx in range(idx+1, len(cliques)):
# clq2 = cliques[clq2_idx]
#
# # Check for d-graph candidates
# d_graph = Dgraph(node)
# d_graph.put_halves(clq1, clq2, neighbors_graph)
#
# factor = 0.5
# #if clique_mode == "testing": factor = 1 # still allows louvain's big communities
# #print("link div:",d_graph.get_link_divergence(),"opt:",d_graph.get_optimal_score(), "good d graph?",d_graph.get_link_divergence() <= d_graph.get_optimal_score() *factor)
# if d_graph.get_link_divergence() <= d_graph.get_optimal_score() * factor:
# node_d_graphs.add(d_graph)
#
# # Fill the index by node
# key = frozenset({node})
# for dg in node_d_graphs:
# d_graphs.add_value(key, dg)
#
# d_graphs.filter_by_entry()
# return d_graphs
deconvolution/dgraph/AbstractDGIndex.py
0 → 100644
View file @
6cb17069
from
abc
import
abstractmethod
class
AbstractDGIndex
(
dict
):
def
__init__
(
self
,
size
=
3
):
""" This class represent a index of dgraphs.
Each key in the index is a set of barcodes and each value a list of dgraphs containing these barcodes.
:param size: Usage vary regarding the subclass used
"""
super
(
AbstractDGIndex
,
self
).
__init__
()
self
.
size
=
size
@
abstractmethod
def
_verify_key
(
self
,
key_set
,
dg_size
=
0
):
pass
def
add_value
(
self
,
key_set
,
dgraph
):
""" Add the couple key (set of barcodes) and value (dgraph) at the right place in the dict
"""
pass
# Test the key size
self
.
_verify_key
(
key_set
)
key_set
=
frozenset
(
key_set
)
# Add the key if not already present
if
key_set
not
in
self
:
self
[
key_set
]
=
set
()
# Associate the value with the key
self
[
key_set
].
add
(
dgraph
)
@
abstractmethod
def
add_dgraph
(
self
,
dg
):
""" Generate all the set needed for keys in the dgraph and push the d-graph as value.
For fixed size of the dgraph all the sets of this size will be generated as key.
Otherwise, all the set of size at least len(dg) - size will be generated.
"""
pass
def
_filter_entry
(
self
,
key_set
):
""" For one entry in the index, filter out dominated dgraphs
:param key_set: The entry to filter
"""
# Verify presence in the index
if
key_set
not
in
self
:
raise
KeyError
(
"The set is not present in the index"
)
# n² filtering
dgs
=
self
[
key_set
]
to_remove
=
set
()
for
dg1
in
dgs
:
for
dg2
in
dgs
:
if
dg1
.
is_dominated
(
dg2
):
to_remove
.
add
(
dg1
)
break
self
[
key_set
]
=
dgs
-
to_remove
return
to_remove
def
filter_by_entry
(
self
):
for
key_set
in
self
:
removed
=
self
.
_filter_entry
(
key_set
)
# TODO: remove globaly ?
def
__contains__
(
self
,
key
):
key
=
frozenset
(
key
)
return
super
(
AbstractDGIndex
,
self
).
__contains__
(
key
)
def
__getitem__
(
self
,
key
):
return
super
(
AbstractDGIndex
,
self
).
__getitem__
(
self
.
__keytransform__
(
key
))
def
__keytransform__
(
self
,
key
):
return
frozenset
(
key
)
deconvolution/dgraph/CliqueDGFactory.py
0 → 100644
View file @
6cb17069
import
networkx
as
nx
from
deconvolution.dgraph.AbstractDGFactory
import
AbstractDGFactory
from
deconvolution.dgraph.d_graph
import
Dgraph
class
CliqueDGFactory
(
AbstractDGFactory
):
def
__init__
(
self
,
graph
,
min_size_clique
=
4
,
dg_max_divergence_factor
=
0.5
):
super
(
CliqueDGFactory
,
self
).
__init__
(
graph
)
self
.
min_size
=
min_size_clique
self
.
dg_max_divergence_factor
=
dg_max_divergence_factor
def
generate_by_node
(
self
,
node
,
subgraph
):
node_d_graphs
=
set
()
# Clique computation
cliques
=
[]
for
clique
in
nx
.
find_cliques
(
subgraph
):
if
len
(
clique
)
>=
self
.
min_size
:
cliques
.
append
(
clique
)
# TODO: Index cliques to pair them faster
# d-graph computation
for
idx
,
clq1
in
enumerate
(
cliques
):
for
clq2_idx
in
range
(
idx
+
1
,
len
(
cliques
)):
clq2
=
cliques
[
clq2_idx
]
# Check for d-graph candidates
d_graph
=
Dgraph
(
node
)
d_graph
.
put_halves
(
clq1
,
clq2
,
subgraph
)
if
d_graph
.
get_link_divergence
()
<=
d_graph
.
get_optimal_score
()
*
self
.
dg_max_divergence_factor
:
node_d_graphs
.
add
(
d_graph
)
return
node_d_graphs
deconvolution/dgraph/FixedDGIndex.py
0 → 100644
View file @
6cb17069
from
itertools
import
combinations
from
deconvolution.dgraph.AbstractDGIndex
import
AbstractDGIndex
class
FixedDGIndex
(
AbstractDGIndex
):
def
__init__
(
self
,
size
=
3
):
super
(
FixedDGIndex
,
self
).
__init__
(
size
=
size
)
def
_verify_key
(
self
,
key_set
,
dg_size
=
0
):
if
len
(
key_set
)
!=
self
.
size
:
raise
ValueError
(
"Wrong set size in the dgraph"
)
def
add_dgraph
(
self
,
dg
):
barcodes
=
dg
.
node_set
for
tup
in
combinations
(
barcodes
,
self
.
size
):
self
.
add_value
(
frozenset
(
tup
),
dg
)
deconvolution/dgraph/LouvainDGFactory.py
0 → 100644
View file @
6cb17069
import
networkx
as
nx
from
deconvolution.dgraph.AbstractDGFactory
import
AbstractDGFactory
from
deconvolution.dgraph.d_graph
import
Dgraph
import
community
class
LouvainDGFactory
(
AbstractDGFactory
):
def
__init__
(
self
,
graph
,
dg_max_divergence_factor
=
0.5
):
super
(
LouvainDGFactory
,
self
).
__init__
(
graph
)
self
.
dg_max_divergence_factor
=
dg_max_divergence_factor
def
generate_by_node
(
self
,
node
,
subgraph
):
node_d_graphs
=
set
()
louvain
=
community
.
best_partition
(
subgraph
)
# louvain
# high resolution seems to work better
communities
=
[[
c
for
c
,
i
in
louvain
.
items
()
if
i
==
clique_id
]
for
clique_id
in
set
(
louvain
.
values
())]
cliques
=
[]
for
comm
in
communities
:
# further decompose! into necessarily 2 communities
community_as_graph
=
nx
.
Graph
(
subgraph
.
subgraph
(
comm
))
if
len
(
community_as_graph
.
nodes
())
<=
2
:
cliques
+=
[
community_as_graph
.
nodes
()]
else
:
cliques
+=
map
(
list
,
nx
.
community
.
asyn_fluidc
(
community_as_graph
,
2
))
# d-graph computation
for
idx
,
clq1
in
enumerate
(
cliques
):
for
clq2_idx
in
range
(
idx
+
1
,
len
(
cliques
)):
clq2
=
cliques
[
clq2_idx
]
# Check for d-graph candidates
d_graph
=
Dgraph
(
node
)
d_graph
.
put_halves
(
clq1
,
clq2
,
subgraph
)
if
d_graph
.
get_link_divergence
()
<=
d_graph
.
get_optimal_score
()
*
self
.
dg_max_divergence_factor
:
node_d_graphs
.
add
(
d_graph
)
return
node_d_graphs
deconvolution/dgraph/VariableDGIndex.py
0 → 100644
View file @
6cb17069
from
itertools
import
combinations
from
deconvolution.dgraph.AbstractDGIndex
import
AbstractDGIndex
class
VariableDGIndex
(
AbstractDGIndex
):