Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
15906856
Commit
15906856
authored
May 16, 2020
by
Yoann Dufresne
Browse files
barcode total ordering computing
parent
aa092fa2
Changes
3
Hide whitespace changes
Inline
Side-by-side
deconvolution/d2graph/d2_path.py
View file @
15906856
import
networkx
as
nx
from
collections
import
Counter
import
sys
""" Represent an udg path into a d2g graph
...
...
@@ -10,6 +12,10 @@ class Path(list):
self
.
d2g
=
d2g
self
.
covering_variables
=
{
x
:
0
for
x
in
self
.
d2g
.
barcode_edge_idxs
.
values
()}
self
.
covering_value
=
0
# a succession of Counter (multiset)
self
.
barcode_order
=
[]
self
.
lcp_per_multiset
=
[]
self
.
barcode_score
=
0
def
append
(
self
,
obj
)
->
None
:
lcp
=
self
.
d2g
.
get_lcp
(
obj
)
...
...
@@ -21,10 +27,51 @@ class Path(list):
self
.
covering_value
+=
1
self
.
covering_variables
[
edge_idx
]
+=
1
self
.
_append_barcodes
(
lcp
)
super
(
Path
,
self
).
append
(
lcp
)
def
_append_barcodes
(
self
,
lcp
):
set_idx
=
len
(
self
.
barcode_order
)
-
1
remaining_barcodes
=
Counter
(
lcp
.
nodes
)
while
set_idx
>=
0
:
current_multiset
=
self
.
barcode_order
[
set_idx
]
intersection
=
remaining_barcodes
&
current_multiset
if
intersection
==
current_multiset
:
remaining_barcodes
-=
intersection
self
.
lcp_per_multiset
[
set_idx
].
add
(
lcp
)
self
.
barcode_score
+=
1
else
:
if
len
(
intersection
)
==
0
:
break
# Split the multiset in two parts
left
=
current_multiset
-
intersection
right
=
intersection
self
.
barcode_order
[
set_idx
]
=
right
self
.
barcode_order
.
insert
(
set_idx
,
left
)
# Split the lcp appearance
self
.
lcp_per_multiset
.
insert
(
set_idx
,
self
.
lcp_per_multiset
[
set_idx
].
copy
())
self
.
lcp_per_multiset
[
set_idx
+
1
].
add
(
lcp
)
self
.
barcode_score
+=
len
(
self
.
lcp_per_multiset
[
set_idx
+
1
])
# Update remaining barcodes
remaining_barcodes
-=
intersection
break
set_idx
-=
1
if
len
(
remaining_barcodes
)
>
0
:
self
.
barcode_order
.
append
(
remaining_barcodes
)
self
.
lcp_per_multiset
.
append
({
lcp
})
self
.
barcode_score
+=
1
def
pop
(
self
,
index
=-
1
):
if
index
!=
-
1
:
print
(
"Warning: pop on other values than -1 have side effects here. The code will be adapted soon"
,
file
=
sys
.
stderr
)
exit
(
1
)
index
=
-
1
lcp
=
super
(
Path
,
self
).
pop
(
index
)
self
.
_pop_barcodes
(
lcp
)
# Update the covering variables
for
barcode_edge
in
lcp
.
edges
:
...
...
@@ -35,6 +82,24 @@ class Path(list):
return
lcp
def
_pop_barcodes
(
self
,
lcp
):
set_idx
=
len
(
self
.
barcode_order
)
-
1
while
set_idx
>=
0
and
lcp
in
self
.
lcp_per_multiset
[
set_idx
]:
lcp_list
=
self
.
lcp_per_multiset
[
set_idx
]
lcp_list
.
remove
(
lcp
)
self
.
barcode_score
-=
1
if
len
(
lcp_list
)
==
0
:
self
.
lcp_per_multiset
.
pop
(
set_idx
)
self
.
barcode_order
.
pop
(
set_idx
)
set_idx
-=
1
if
0
<=
set_idx
<
len
(
self
.
lcp_per_multiset
)
-
1
and
self
.
lcp_per_multiset
[
set_idx
]
==
self
.
lcp_per_multiset
[
set_idx
+
1
]:
rmv
=
self
.
lcp_per_multiset
.
pop
(
set_idx
)
self
.
barcode_score
-=
len
(
rmv
)
self
.
barcode_order
[
set_idx
+
1
]
+=
self
.
barcode_order
[
set_idx
]
self
.
barcode_order
.
pop
(
set_idx
)
def
copy
(
self
):
copy
=
Path
(
self
.
d2g
)
...
...
@@ -47,6 +112,13 @@ class Path(list):
copy
.
covering_variables
[
key
]
=
val
copy
.
covering_value
=
self
.
covering_value
# Copy barcode structures
for
lcp_list
in
self
.
lcp_per_multiset
:
copy
.
lcp_per_multiset
.
append
(
lcp_list
.
copy
())
for
barcodes
in
self
.
barcode_order
:
copy
.
barcode_order
.
append
(
barcodes
.
copy
())
copy
.
barcode_score
=
self
.
barcode_score
return
copy
def
add_path
(
self
,
path
):
...
...
deconvolution/d2graph/path_optimization.py
View file @
15906856
...
...
@@ -43,7 +43,7 @@ class Optimizer:
best_path
=
current_path
.
copy
()
last_increase_node
=
current_node
if
verbose
:
print
(
f
"New best:
{
len
(
best_path
)
}
{
best_path
.
covering_value
}
/
{
max_cov
}
"
)
print
(
f
"New best:
{
len
(
best_path
)
}
{
best_path
.
covering_value
}
/
{
max_cov
}
(
{
best_path
.
barcode_score
}
)
"
)
if
best_path
.
covering_value
==
max_cov
:
print
(
"Max coverage"
)
...
...
deconvolution/main/d2_to_path.py
View file @
15906856
...
...
@@ -45,6 +45,10 @@ def main():
optimizer
=
po
.
Optimizer
(
largest_component
)
path
=
optimizer
.
bb_solution
(
verbose
=
args
.
verbose
)
for
barcodes
in
path
.
barcode_order
:
print
(
len
(
barcodes
),
' '
,
end
=
""
)
print
()
print
()
print
(
f
"covering score:
{
path
.
covering_score
()
}
"
)
path
.
save_gexf
(
f
"
{
args
.
out_prefix
}
_path.gexf"
)
print
(
"Solution saved"
)
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment