Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
linked reads molecule ordering
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Incidents
Environments
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
97b187fc
Commit
97b187fc
authored
May 13, 2020
by
Yoann Dufresne
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
bugged version of barcode ordonancer
parent
7d2ef666
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
119 additions
and
21 deletions
+119
-21
deconvolution/barcodes/partialorder.py
deconvolution/barcodes/partialorder.py
+115
-15
deconvolution/main/d2_to_barcode_path.py
deconvolution/main/d2_to_barcode_path.py
+4
-6
No files found.
deconvolution/barcodes/partialorder.py
View file @
97b187fc
...
...
@@ -25,6 +25,24 @@ class PartialOrder:
# This score must be updated when the order is modified
self
.
score
=
0
self
.
debug_stack
=
[]
def
copy
(
self
):
copy
=
PartialOrder
()
for
ms
in
self
.
barcode_order
:
copy
.
barcode_order
.
append
(
ms
.
copy
())
for
udg
in
self
.
udg_order
:
copy
.
udg_order
.
append
(
udg
)
for
udg_set
in
self
.
udg_per_set
:
copy
.
udg_per_set
.
append
(
udg_set
.
copy
())
copy
.
len_barcodes
=
self
.
len_barcodes
copy
.
len_sets
=
self
.
len_sets
copy
.
len_udgs
=
self
.
len_udgs
copy
.
score
=
self
.
score
return
copy
def
_get_right_overlaps
(
self
,
udg
):
"""" Get the overlap of an udg with the right part of the multiset partial order.
:param udg: The udg to overlap
...
...
@@ -36,19 +54,25 @@ class PartialOrder:
# Will look for full overlaps from right to left
while
current_set_idx
>=
0
:
ms
=
self
.
barcode_order
[
current_set_idx
]
if
len
(
ms
-
remaining_barcodes
)
==
0
and
ms
!=
remaining_barcodes
:
current_set_idx
-=
1
remaining_barcodes
-=
ms
remaining_barcodes
=
remaining_barcodes
-
ms
elif
len
(
ms
&
remaining_barcodes
)
==
0
:
return
current_set_idx
+
1
,
Counter
(),
remaining_barcodes
else
:
# leftmost multiset , leftmost non overlapping, non overlapping barcodes
return
current_set_idx
,
ms
-
remaining_barcodes
,
remaining_barcodes
-
ms
return
-
1
,
Counter
(),
remaining_barcodes
return
0
,
Counter
(),
remaining_barcodes
def
add_right
(
self
,
udg
):
save
=
self
.
copy
()
self
.
udg_order
.
append
(
udg
)
self
.
len_udgs
+=
1
scores
=
[
0
,
0
,
0
]
# Step 1 - Determine overlapping multisets from right to left
leftmost_idx
,
left_non_overlap
,
new_multiset
=
self
.
_get_right_overlaps
(
udg
)
# Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2)
...
...
@@ -64,18 +88,63 @@ class PartialOrder:
self
.
udg_per_set
.
insert
(
leftmost_idx
,
self
.
udg_per_set
[
leftmost_idx
-
1
].
copy
())
self
.
len_sets
+=
1
self
.
score
+=
len
(
self
.
udg_per_set
[
leftmost_idx
])
scores
[
0
]
+=
len
(
self
.
udg_per_set
[
leftmost_idx
])
# Step 3 - Add the udg as covering the right multisets
for
idx
in
range
(
max
(
0
,
leftmost_idx
),
self
.
len_sets
):
self
.
udg_per_set
[
idx
].
add
(
udg
)
self
.
score
+=
1
scores
[
1
]
+=
1
# Step
3
- Add a new multiset on the right for the remaining barcodes
# Step
4
- Add a new multiset on the right for the remaining barcodes
if
len
(
new_multiset
)
>
0
:
self
.
barcode_order
.
append
(
new_multiset
)
self
.
udg_per_set
.
append
(
set
())
self
.
len_sets
+=
1
self
.
len_barcodes
+=
sum
(
new_multiset
.
values
())
# Step 4 - Add the udg as covering the right multisets
for
idx
in
range
(
max
(
0
,
leftmost_idx
),
self
.
len_sets
):
self
.
udg_per_set
[
idx
].
add
(
udg
)
self
.
len_sets
+=
1
self
.
score
+=
1
scores
[
2
]
+=
1
self
.
udg_per_set
.
append
({
udg
})
self
.
debug_stack
.
append
((
udg
,
scores
))
def
add_right2
(
self
,
udg
):
left_idx
,
leftmost_overlap
,
rightmost_overlap
=
self
.
_get_right_overlaps2
(
udg
)
def
remove_right
(
self
):
save
=
self
.
copy
()
# Step 1 - Remove the udg
last_udg
=
self
.
udg_order
.
pop
()
scores
=
[
0
]
*
3
last_debug
,
last_scores
=
self
.
debug_stack
.
pop
()
self
.
len_udgs
-=
1
# Step 2 - Remove the last multiset if only cover by last_udg
if
len
(
self
.
udg_per_set
[
-
1
])
==
1
:
self
.
udg_per_set
.
pop
()
ms
=
self
.
barcode_order
.
pop
()
self
.
len_barcodes
-=
sum
(
ms
.
values
())
self
.
len_sets
-=
1
scores
[
2
]
=
-
1
self
.
score
-=
1
# Step 3 - Remove last_udg from coverings from right to left
rightmost_covered_idx
=
len
(
self
.
barcode_order
)
-
1
while
rightmost_covered_idx
>=
0
and
last_udg
in
self
.
udg_per_set
[
rightmost_covered_idx
]:
self
.
udg_per_set
[
rightmost_covered_idx
].
remove
(
last_udg
)
self
.
score
-=
1
scores
[
1
]
-=
1
rightmost_covered_idx
-=
1
# Step 4 - Merge the two left sets of interest if they are identical
left_interest
=
rightmost_covered_idx
if
0
<=
left_interest
<
len
(
self
.
udg_per_set
)
-
1
:
# Check set similarity
if
self
.
udg_per_set
[
left_interest
]
==
self
.
udg_per_set
[
left_interest
+
1
]:
sets
=
self
.
udg_per_set
.
pop
(
left_interest
)
self
.
score
-=
len
(
sets
)
scores
[
0
]
-=
len
(
sets
)
ms
=
self
.
barcode_order
.
pop
(
left_interest
)
self
.
barcode_order
[
left_interest
]
=
self
.
barcode_order
[
left_interest
]
+
ms
self
.
len_sets
-=
1
return
last_udg
def
get_add_score
(
self
,
udg
):
score
=
0
...
...
@@ -92,11 +161,6 @@ class PartialOrder:
# covering number points for the new udg
score
+=
self
.
len_sets
-
leftmost_idx
# Negative points for redundant elements
# shift one left
remaining_size
=
sum
(
remaining_right
.
values
())
-
sum
(
left_non_overlap
.
values
())
leftmost_idx
-=
1
return
score
def
reverse_order
(
self
):
...
...
@@ -108,6 +172,7 @@ class PartialOrder:
return
self
.
len_barcodes
_predicted_score
=
0
_saved_neighbors
=
{}
def
_next_node
(
d2g
,
partial_order
,
node
,
used
):
node
=
str
(
node
)
...
...
@@ -134,6 +199,8 @@ def _next_node(d2g, partial_order, node, used):
return
None
else
:
neighbors
.
discard
(
max_neighbor_name
)
global
_predicted_score
_predicted_score
=
max_score
return
max_neighbor_name
...
...
@@ -161,9 +228,42 @@ def greedy_partial_order(d2g, node):
forward
=
False
po
.
reverse_order
()
current_node
=
str
(
po
.
udg_order
[
-
1
].
idx
)
print
(
po
.
score
,
"reverse"
)
else
:
reverse
=
False
return
po
def
bb_partial_order
(
d2g
,
node
):
used_nodes
=
{
str
(
n
):
False
for
n
in
d2g
.
nodes
()}
used_nodes
[
str
(
node
)]
=
True
current_node_name
=
str
(
node
)
current_udg
=
d2g
.
node_by_idx
[
int
(
node
)]
po
=
PartialOrder
()
po
.
add_right
(
current_udg
)
can_continue
=
True
while
can_continue
:
next_node_name
=
_next_node
(
d2g
,
po
,
current_node_name
,
used_nodes
)
# We found a new deeper solution
if
next_node_name
is
not
None
:
next_udg
=
d2g
.
node_by_idx
[
int
(
next_node_name
)]
_score
=
po
.
score
global
_predicted_score
po
.
add_right
(
next_udg
)
_score
=
po
.
score
-
_score
used_nodes
[
next_node_name
]
=
True
current_node_name
=
next_node_name
# All the possible solutions have been explored
elif
len
(
po
)
==
0
:
can_continue
=
False
# We are in a dead end, must go back one step
else
:
yield
po
.
copy
()
back_udg
=
po
.
remove_right
()
used_nodes
[
current_node_name
]
=
False
del
_saved_neighbors
[
current_node_name
]
current_node_name
=
str
(
back_udg
.
idx
)
deconvolution/main/d2_to_barcode_path.py
View file @
97b187fc
...
...
@@ -6,7 +6,7 @@ import sys
import
random
from
deconvolution.d2graph
import
d2_graph
as
d2
from
barcodes.partialorder
import
greedy_partial_order
from
barcodes.partialorder
import
greedy_partial_order
,
bb_partial_order
def
parse_arguments
():
...
...
@@ -44,11 +44,9 @@ def main():
all_nodes
=
list
(
largest_component
.
nodes
())
rnd_node
=
all_nodes
[
random
.
randint
(
0
,
len
(
all_nodes
)
-
1
)]
po
=
greedy_partial_order
(
largest_component
,
rnd_node
)
print
(
"barcodes"
,
len
(
po
))
print
(
"sets"
,
po
.
len_sets
)
print
(
"udgs"
,
po
.
len_udgs
)
print
(
"score"
,
po
.
score
)
# po = greedy_partial_order(largest_component, rnd_node)
for
po
in
bb_partial_order
(
largest_component
,
rnd_node
):
print
(
"barcodes"
,
len
(
po
),
"sets"
,
po
.
len_sets
,
"udgs"
,
po
.
len_udgs
,
"score"
,
po
.
score
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment