Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
66948796
Commit
66948796
authored
Mar 12, 2019
by
Yoann Dufresne
Browse files
overlapping deconvolution
parent
e8142332
Changes
1
Hide whitespace changes
Inline
Side-by-side
deconvolve.py
View file @
66948796
#!/usr/bin/env python3
import
sys
import
math
import
networkx
as
nx
import
itertools
def
deconvolve
(
G
,
node
):
def
deconvolve
(
G
,
node
,
verbose
=
0
):
neighbors
=
list
(
G
.
neighbors
(
node
))
print
(
"node"
,
node
,
len
(
neighbors
)
,
"neighbors"
)
nei_len
=
len
(
neighbors
)
# Extract neighbors from the graph
G_neighbors
=
nx
.
Graph
(
G
.
subgraph
(
neighbors
))
communities
=
get_communities
(
G_neighbors
,
node
==
"273:597_148"
)
communities
=
get_communities
(
G_neighbors
,
verbose
=
verbose
-
1
)
# Continue only if something need to be splited.
if
len
(
communities
)
==
1
:
...
...
@@ -30,16 +31,20 @@ def deconvolve(G,node):
# Remove old node
G
.
remove_node
(
node
)
print
(
"splitted into"
,
len
(
communities
),
"parts
\n
"
)
if
verbose
>
0
:
print
(
"node"
,
node
,
nei_len
,
"neighbors"
)
print
(
"splitted into"
,
len
(
communities
),
"parts
\n
"
)
def
get_communities
(
G
,
max_overlap
=
2
,
verbose
=
False
):
def
get_communities
(
G
,
max_overlap
=
1
,
verbose
=
0
):
# Half d-graphs are cliques. So compute max cliques
cliques
=
list
(
nx
.
find_cliques
(
G
))
if
verbose
:
if
verbose
>
0
:
print
(
"clique list"
)
for
clq
in
cliques
:
print
(
clq
,
"
\n
"
)
print
(
clq
)
print
()
candidate_d_graphs
=
[]
...
...
@@ -56,16 +61,17 @@ def get_communities(G, max_overlap=2, verbose=False):
if
val
in
clq2
:
overlap
+=
1
if
overlap
>
max_overlap
:
# print(overlap, "is too high overlap")
continue
# Check for d-graph candidates
d_graph
=
compute_d_graph
(
clq1
,
clq2
,
G
)
d_graph
=
compute_d_graph
(
clq1
,
clq2
,
G
,
verbose
=
verbose
-
1
)
if
d_graph
!=
None
:
candidate_d_graphs
.
append
(
d_graph
)
# Extract communites from all the possible d-graphes in the neighborood.
# This is a minimal covering d_graph algorithm.
minimal_d_graphes
=
filter_d_graphs
(
candidate_d_graphs
)
minimal_d_graphes
=
filter_d_graphs
(
candidate_d_graphs
,
max_overlap
=
max_overlap
)
# If no community detected, return one big.
if
len
(
minimal_d_graphes
)
==
0
:
...
...
@@ -87,16 +93,34 @@ def get_communities(G, max_overlap=2, verbose=False):
@param G the graph of the neighbors of the central node (not present).
@return A pair of lists that are the 2 halves of the d-graph ordered from the center.
"""
def
compute_d_graph
(
clq1
,
clq2
,
G
,
verbose
=
False
):
def
compute_d_graph
(
clq1
,
clq2
,
G
,
max_diff_size
=
1
,
verbose
=
0
):
# Compute the arities between the cliques
arities1
=
{
name
:
0
for
name
in
clq1
}
arities2
=
{
name
:
0
for
name
in
clq2
}
sum_edges
=
0
# TODO : Remove this part and improve the detection
if
len
(
clq1
)
!=
len
(
clq2
):
return
None
# /TODO
# Limit the number of recursions
if
abs
(
len
(
clq1
)
-
len
(
clq2
))
>
max_diff_size
:
return
None
# Recursion on the biggest clique to reduce complexity.
smallest
,
largest
=
(
clq1
,
clq2
)
if
len
(
clq2
)
>
len
(
clq1
)
else
(
clq2
,
clq1
)
minimal_weighted_d_graph
=
None
minimal_weight
=
math
.
inf
for
idx
in
range
(
len
(
largest
)):
recur_d_graph
=
compute_d_graph
(
smallest
,
largest
[:
idx
]
+
largest
[
idx
+
1
:],
G
,
verbose
=
verbose
)
if
recur_d_graph
!=
None
and
recur_d_graph
[
2
]
<
minimal_weight
:
minimal_weighted_d_graph
=
recur_d_graph
minimal_weight
=
recur_d_graph
[
2
]
if
verbose
>
0
:
print
(
f
"Recursive calls for:
\n
{
clq1
}
\n
{
clq2
}
\n
"
)
print
(
minimal_weighted_d_graph
,
"
\n
"
)
print
(
"/ Recursive
\n
"
)
return
minimal_weighted_d_graph
min_clq_size
=
min
(
len
(
clq1
),
len
(
clq2
))
...
...
@@ -105,15 +129,15 @@ def compute_d_graph(clq1, clq2, G, verbose=False):
neighbors
=
list
(
G
.
neighbors
(
node1
))
for
node2
in
clq2
:
if
node2
in
neighbors
:
if
node1
==
node2
or
node2
in
neighbors
:
# print(node1, "-", node2)
arities1
[
node1
]
+=
1
arities2
[
node2
]
+=
1
sum_edges
+=
1
if
verbose
:
print
(
clq1
,
clq2
)
print
(
arities1
,
arities2
,
"
\n
"
)
#
if verbose:
#
print(clq1, clq2)
#
print(arities1, arities2, "\n")
# Reject if not enought edges
if
sum_edges
<
min_clq_size
*
(
min_clq_size
-
1
)
/
2
:
...
...
@@ -127,40 +151,63 @@ def compute_d_graph(clq1, clq2, G, verbose=False):
lst1
=
[
key
for
key
,
value
in
sorted
(
arities1
.
items
(),
key
=
lambda
tup
:
-
tup
[
1
])]
lst2
=
[
key
for
key
,
value
in
sorted
(
arities2
.
items
(),
key
=
lambda
tup
:
-
tup
[
1
])]
if
verbose
:
if
verbose
>
0
:
print
(
min_clq_size
)
print
(
lst1
,
"
\n
"
,
lst2
,
"
\n
"
)
# Return the 2 halves of the d-graph
return
lst1
,
lst2
return
lst1
,
lst2
,
sum_edges
""" Filter the candiates regarding their compatibilities
"""
def
filter_d_graphs
(
candidates
):
# Count for each node the number of their apparition
counts
=
{}
def
filter_d_graphs
(
candidates
,
max_overlap
=
0
):
# Count for each node the number of their apparition (regarding the half overlap)
selected
=
{}
counts_by_size
=
[{}
for
_
in
range
(
max_overlap
+
1
)]
sorted_d_graphs
=
[[]
for
_
in
range
(
max_overlap
+
1
)]
for
d_graph
in
candidates
:
# Compute intersection of the two halves
common_length
=
len
(
set
(
d_graph
[
0
])
&
set
(
d_graph
[
1
]))
sorted_d_graphs
[
common_length
].
append
(
d_graph
)
# Count occurences
for
node
in
itertools
.
chain
(
d_graph
[
0
],
d_graph
[
1
]):
if
not
node
in
counts
:
counts
[
node
]
=
0
counts
[
node
]
+=
1
if
not
node
in
counts_by_size
[
common_length
]:
counts_by_size
[
common_length
][
node
]
=
0
counts_by_size
[
common_length
][
node
]
+=
1
selected
[
node
]
=
False
# take d_graphes with nodes that appears only once
filtered
=
[]
selected
=
{
node
:
False
for
node
in
counts
.
keys
()}
for
d_graph
in
candidates
:
for
node
in
itertools
.
chain
(
d_graph
[
0
],
d_graph
[
1
]):
if
counts
[
node
]
==
1
:
# Add the d_graph to the selection
filtered
.
append
(
d_graph
)
# register selection of the nodes
for
node
in
itertools
.
chain
(
d_graph
[
0
],
d_graph
[
1
]):
selected
[
node
]
=
True
# Over for this d-graph
for
overlap_size
in
range
(
max_overlap
+
1
):
# Look for d_graphs with overlapping halves first, then 1 node, ...
for
d_graph
in
sorted_d_graphs
[
overlap_size
]:
common_length
=
len
(
set
(
d_graph
[
0
])
&
set
(
d_graph
[
1
]))
for
node
in
itertools
.
chain
(
d_graph
[
0
],
d_graph
[
1
]):
# Count appearance
total_count
=
0
for
length
in
range
(
overlap_size
+
1
):
total_count
+=
counts_by_size
[
common_length
][
node
]
if
node
in
counts_by_size
[
common_length
]
else
0
# Add d-graph
if
total_count
==
1
:
# Add the d_graph to the selection
filtered
.
append
(
d_graph
)
# register selection of the nodes
for
node
in
itertools
.
chain
(
d_graph
[
0
],
d_graph
[
1
]):
selected
[
node
]
=
True
# Over for this d-graph
break
# Stop if all nodes are selected
over
=
True
for
val
in
selected
.
values
():
if
not
val
:
over
=
False
break
if
over
:
break
# TODO : improve performances when there are no uniq solution
for
val
in
selected
.
values
():
...
...
@@ -184,7 +231,7 @@ def main():
# Deconvolve
g_nodes
=
list
(
G
.
nodes
())
for
node
in
g_nodes
:
deconvolve
(
G
,
node
)
deconvolve
(
G
,
node
,
verbose
=
1
)
# if (node=="273:597_148") else 0
)
# exit()
print
(
len
(
g_nodes
),
"->"
,
len
(
list
(
G
.
nodes
())))
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment