Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
90276b3f
Commit
90276b3f
authored
Mar 13, 2019
by
Yoann Dufresne
Browse files
add some verbose
parent
66948796
Changes
1
Show whitespace changes
Inline
Sidebyside
deconvolve.py
View file @
90276b3f
...
...
@@ 7,22 +7,25 @@ import itertools
def
deconvolve
(
G
,
node
,
verbose
=
0
):
def
local_
deconvolve
(
G
,
node
,
verbose
=
0
):
neighbors
=
list
(
G
.
neighbors
(
node
))
nei_len
=
len
(
neighbors
)
# Extract neighbors from the graph
G_neighbors
=
nx
.
Graph
(
G
.
subgraph
(
neighbors
))
communities
=
get_communities
(
G_neighbors
,
verbose
=
verbose

1
)
communities
=
get_communities
(
G_neighbors
,
max_overlap
=
0
,
verbose
=
verbose

1
)
# Continue only if something need to be splited.
if
len
(
communities
)
==
1
:
if
verbose
>
0
:
print
(
"node"
,
node
,
nei_len
,
"neighbors"
)
print
(
"No split
\n
"
)
return
# Split communities
for
idx
,
community
in
enumerate
(
communities
):
# Add community center
node_name
=
f
"
{
node
}
_
{
idx
}
"
node_name
=
f
"
{
node
}
.
{
idx
}
"
G
.
add_node
(
node_name
)
# Add links from the center to the community
...
...
@@ 36,7 +39,7 @@ def deconvolve(G,node, verbose=0):
print
(
"splitted into"
,
len
(
communities
),
"parts
\n
"
)
def
get_communities
(
G
,
max_overlap
=
1
,
verbose
=
0
):
def
get_communities
(
G
,
max_overlap
=
1
,
strict
=
True
,
verbose
=
0
):
# Half dgraphs are cliques. So compute max cliques
cliques
=
list
(
nx
.
find_cliques
(
G
))
...
...
@@ 65,20 +68,44 @@ def get_communities(G, max_overlap=1, verbose=0):
continue
# Check for dgraph candidates
d_graph
=
compute_d_graph
(
clq1
,
clq2
,
G
,
verbose
=
verbose

1
)
d_graph
=
compute_d_graph
(
clq1
,
clq2
,
G
,
verbose
=
verbose

1
,
max_diff_size
=
0
)
if
d_graph
!=
None
:
candidate_d_graphs
.
append
(
d_graph
)
# Extract communites from all the possible dgraphes in the neighborood.
# Extract communit
i
es from all the possible dgraphes in the neighborood.
# This is a minimal covering d_graph algorithm.
minimal_d_graphes
=
filter_d_graphs
(
candidate_d_graphs
,
max_overlap
=
max_overlap
)
minimal_d_graphes
,
unpartitionned
=
filter_d_graphs
(
candidate_d_graphs
,
max_overlap
=
max_overlap
)
if
strict
and
len
(
unpartitionned
)
>
0
:
if
verbose
>
0
:
print
(
"Partialy unpartionned. Aborted"
)
return
[
list
(
G
.
nodes
())]
communities
=
[
list
(
set
(
d_graph
[
0
]
+
d_graph
[
1
]))
for
d_graph
in
minimal_d_graphes
]
# complete unpartitionned nodes
to_add
=
[]
for
idx
,
d_graph
in
enumerate
(
communities
):
for
node
in
d_graph
:
neighbors
=
G
.
neighbors
(
node
)
for
nei
in
neighbors
:
if
nei
in
unpartitionned
:
to_add
.
append
(
node
)
break
unpartitionned
.
extend
(
list
(
set
(
to_add
)))
# If no community detected, return one big.
if
len
(
minimal_d_graphes
)
==
0
:
if
len
(
unpartitionned
)
==
len
(
list
(
G
.
nodes
()))
:
return
[
list
(
G
.
nodes
())]
# add unpartitionned if not empty
elif
len
(
unpartitionned
)
>
0
:
communities
.
append
(
unpartitionned
)
if
verbose
>
0
:
for
community
in
communities
:
print
(
community
)
communites
=
[
list
(
set
(
d_graph
[
0
]
+
d_graph
[
1
]))
for
d_graph
in
minimal_d_graphes
]
return
communites
return
communities
""" This function take two cliques in the graph G and try to find if they are 2 halfes
...
...
@@ 180,6 +207,7 @@ def filter_d_graphs(candidates, max_overlap=0):
# take d_graphes with nodes that appears only once
filtered
=
[]
partitionned
=
False
for
overlap_size
in
range
(
max_overlap
+
1
):
# Look for d_graphs with overlapping halves first, then 1 node, ...
for
d_graph
in
sorted_d_graphs
[
overlap_size
]:
...
...
@@ 207,16 +235,17 @@ def filter_d_graphs(candidates, max_overlap=0):
if
not
val
:
over
=
False
break
if
over
:
break
# TODO : improve performances when there are no uniq solution
for
val
in
selected
.
values
():
if
not
val
:
# print(min(counts.values()), counts)
return
[]
if
over
:
partitionned
=
True
break
# print(len(filtered))
return
filtered
# If the partionning is not complete, try to subdivise the node anyway.
# Split the node in n d_graph partitions + 1 unpartitionned set of nodes
unpartitionned
=
[
node
for
node
in
selected
if
not
selected
[
node
]]
if
len
(
unpartitionned
)
==
len
(
selected
):
return
[],
unpartitionned
else
:
return
filtered
,
unpartitionned
def
main
():
...
...
@@ 231,7 +260,7 @@ def main():
# Deconvolve
g_nodes
=
list
(
G
.
nodes
())
for
node
in
g_nodes
:
deconvolve
(
G
,
node
,
verbose
=
1
)
#
if (node
=="273:597_148"
) else 0)
local_
deconvolve
(
G
,
node
,
verbose
=
2
if
(
node
.
startswith
(
"0:"
)
)
else
0
)
# exit()
print
(
len
(
g_nodes
),
">"
,
len
(
list
(
G
.
nodes
())))
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment