Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
80495d07
Commit
80495d07
authored
Mar 08, 2019
by
Yoann Dufresne
Browse files
start of d-graph based deconvolution
parent
398b47ac
Changes
2
Hide whitespace changes
Inline
Side-by-side
deconvolve.py
100644 → 100755
View file @
80495d07
# arguments: graphml file
# attempts to deconvolve a barcode graph using community detection
#!/usr/bin/env python3
import
sys
import
itertools
import
operator
from
collections
import
Counter
from
networkx
import
nx
#from networkx.algorithms import community
import
community
# python-louvain
import
networkx
as
nx
import
sys
filename
=
sys
.
argv
[
1
]
G
=
None
if
filename
.
endswith
(
'.graphml'
):
G
=
nx
.
read_graphml
(
filename
)
elif
filename
.
endswith
(
'.gexf'
):
G
=
nx
.
read_gexf
(
filename
)
print
(
G
)
def
deconvolve2
(
G
,
node
):
def
deconvolve
(
G
,
node
):
neighbors
=
list
(
G
.
neighbors
(
node
))
print
(
"node"
,
node
,
len
(
neighbors
),
"neighbors"
)
G2
=
nx
.
Graph
(
G
.
subgraph
(
neighbors
))
# make sure there is a single connected components
while
True
:
ccs
=
list
(
nx
.
connected_components
(
G2
))
if
len
(
ccs
)
==
1
:
break
# else artificially and weakly connect components to run fluid community
n1
=
list
(
ccs
[
0
])[
0
]
n2
=
list
(
ccs
[
1
])[
0
]
G2
.
add_edge
(
n1
,
n2
)
#cliques = list(community.asyn_fluidc(G2,2))
#cliques = list(community.girvan_newman(G2)) # too slow
#cliques = list(community.label_propagation_communities(G2)) # not very accurate
#cliques = list(community.asyn_lpa_communities(G2)) # seems accurate but hands
cliques
=
community
.
best_partition
(
G2
)
print
(
"louvain communities"
,[
len
([
c
for
c
,
i
in
cliques
.
items
()
if
i
==
clique_id
])
for
clique_id
in
set
(
cliques
.
values
())])
if
len
(
cliques
)
==
1
:
return
# nothing to deconvolve here
for
node2
,
clique_id
in
cliques
.
items
():
G
.
add_node
(
node
+
"-MI%d"
%
clique_id
)
G
.
add_edge
(
node
+
"-MI%d"
%
clique_id
,
node2
,
contigs
=
(
G
[
node
][
node2
][
'contigs'
]
if
'contigs'
in
G
[
node
][
node2
]
else
""
))
# for debugging
G2
.
nodes
[
node2
][
'mi'
]
=
clique_id
debug
=
True
if
debug
:
nx
.
write_graphml
(
G2
,
"test.graphml"
)
#exit(0)
G
.
remove_node
(
node
)
g_nodes
=
list
(
G
.
nodes
())
for
node
in
g_nodes
:
deconvolve2
(
G
,
node
)
nx
.
write_graphml
(
G
,
sys
.
argv
[
1
]
+
".deconvolved.graphml"
)
# Extract neighbors from the graph
G_neighbors
=
nx
.
Graph
(
G
.
subgraph
(
neighbors
))
get_communities
(
G_neighbors
)
def
get_communities
(
G
):
# Half d-graphs are cliques. So compute max cliques
cliques
=
list
(
nx
.
find_cliques
(
G
))
print
(
cliques
,
"
\n
"
)
# d-graphs are composed of 2 cliques related by the current node.
# The overlapp between the 2 cliques determine the node order in the d-graph.
for
idx
,
clq1
in
enumerate
(
cliques
):
to_compare
=
cliques
[
idx
+
1
:]
for
clq2
in
to_compare
:
# TO REMOVE : Temporary only check for distinct cliques
jump
=
False
for
val
in
clq1
:
if
val
in
clq2
:
jump
=
True
break
if
jump
:
continue
# / TO REMOVE
# Check for d-graph
compute_d_graph
(
clq1
,
clq2
,
G
)
# print(clq1, clq2, is_d_graph(clq1, clq2, G))
# Extract communites from all the possible d-graphes in the neighborood.
def
compute_d_graph
(
clq1
,
clq2
,
G
):
# Compute the arities between the cliques
arities1
=
{
name
:
0
for
name
in
clq1
}
arities2
=
{
name
:
0
for
name
in
clq2
}
sum_edges
=
0
# Compute link arities
for
node1
in
clq1
:
neighbors
=
list
(
G
.
neighbors
(
node1
))
for
node2
in
clq2
:
if
node2
in
neighbors
:
# print(node1, "-", node2)
arities1
[
node1
]
+=
1
arities2
[
node2
]
+=
1
sum_edges
+=
1
# Reject if not enought edges
if
sum_edges
<
len
(
clq1
)
*
(
len
(
clq1
)
-
1
)
/
2
:
return
None
print
(
clq1
,
clq2
)
print
(
arities1
,
arities2
,
"
\n
"
)
return
None
def
main
():
# Parsing the input file
filename
=
sys
.
argv
[
1
]
G
=
None
if
filename
.
endswith
(
'.graphml'
):
G
=
nx
.
read_graphml
(
filename
)
elif
filename
.
endswith
(
'.gexf'
):
G
=
nx
.
read_gexf
(
filename
)
# Deconvolve
g_nodes
=
list
(
G
.
nodes
())
for
node
in
g_nodes
:
deconvolve
(
G
,
node
)
exit
()
# nx.write_graphml(G,sys.argv[1]+".deconvolved.graphml")
if
__name__
==
"__main__"
:
main
()
generate_fake_molecule_graph.py
View file @
80495d07
...
...
@@ -19,8 +19,8 @@
n
=
100
o
=
5
n
=
100
0
o
=
6
molecules_intervals
=
[]
for
i
in
range
(
n
):
...
...
@@ -51,4 +51,4 @@ for i,a in enumerate(molecules_intervals):
print
(
G
.
edges
())
nx
.
write_graphml
(
G
,
f
"data/simulated_molecules_
{
n
}
_
{
o
}
.graphml"
)
nx
.
write_graphml
(
G
,
f
"data/simulated_molecules_
{
n
}
_
{
o
-
1
}
.graphml"
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment