Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
7d2ef666
Commit
7d2ef666
authored
May 12, 2020
by
Yoann Dufresne
Browse files
partial order bad greedy ok
parent
83e8d871
Changes
2
Hide whitespace changes
Inline
Side-by-side
deconvolution/barcodes/partialorder.py
View file @
7d2ef666
...
...
@@ -45,17 +45,10 @@ class PartialOrder:
return
-
1
,
Counter
(),
remaining_barcodes
def
add_right
(
self
,
udg
):
self
.
udg_order
.
append
(
udg
)
self
.
len_udgs
+=
1
# Empty case
if
len
(
self
)
==
0
:
self
.
barcode_order
.
append
(
Counter
(
udg
.
nodes
))
self
.
udg_per_set
.
append
({
udg
})
return
# Step 1 - Determine overlapping multisets from right to left
leftmost_idx
,
left_non_overlap
,
new_multiset
=
self
.
_get_right_overlaps
(
udg
)
# Step 2 - Modify the leftmost overlapping multiset to match the new udg (Split it in 2)
...
...
@@ -70,6 +63,7 @@ class PartialOrder:
# Copy the previous overlapping udg set for the new multiset
self
.
udg_per_set
.
insert
(
leftmost_idx
,
self
.
udg_per_set
[
leftmost_idx
-
1
].
copy
())
self
.
len_sets
+=
1
self
.
score
+=
len
(
self
.
udg_per_set
[
leftmost_idx
])
# Step 3 - Add a new multiset on the right for the remaining barcodes
if
len
(
new_multiset
)
>
0
:
...
...
@@ -81,9 +75,7 @@ class PartialOrder:
# Step 4 - Add the udg as covering the right multisets
for
idx
in
range
(
max
(
0
,
leftmost_idx
),
self
.
len_sets
):
self
.
udg_per_set
[
idx
].
add
(
udg
)
# TODO: Step 5 - Modify score
self
.
score
+=
1
def
get_add_score
(
self
,
udg
):
score
=
0
...
...
@@ -105,22 +97,73 @@ class PartialOrder:
remaining_size
=
sum
(
remaining_right
.
values
())
-
sum
(
left_non_overlap
.
values
())
leftmost_idx
-=
1
# Search for non overlapped common barcodes
while
remaining_size
>
0
and
leftmost_idx
>=
0
:
ms
=
self
.
barcode_order
[
leftmost_idx
]
# Compute intersection
common
=
ms
&
remaining_right
candidate_negative
=
sum
(
common
.
values
())
score
-=
min
(
candidate_negative
,
remaining_size
)
# Update structures
remaining_right
-=
common
remaining_size
-=
sum
(
ms
.
values
())
leftmost_idx
-=
1
return
score
def
reverse_order
(
self
):
self
.
barcode_order
=
self
.
barcode_order
[::
-
1
]
self
.
udg_order
=
self
.
udg_order
[::
-
1
]
self
.
udg_per_set
=
self
.
udg_per_set
[::
-
1
]
def
__len__
(
self
):
return
self
.
len_barcodes
_saved_neighbors
=
{}
def
_next_node
(
d2g
,
partial_order
,
node
,
used
):
node
=
str
(
node
)
# create neighborhood on the first call
if
node
not
in
_saved_neighbors
:
_saved_neighbors
[
node
]
=
{
str
(
x
)
for
x
in
d2g
[
node
]
if
not
used
[
str
(
x
)]}
neighbors
=
_saved_neighbors
[
node
]
# Return None is no usable neighbor
if
len
(
neighbors
)
==
0
:
return
None
max_score
=
0
max_neighbor_name
=
None
for
neighbor_name
in
neighbors
:
neighbor_udg
=
d2g
.
node_by_idx
[
int
(
neighbor_name
)]
neighbor_score
=
partial_order
.
get_add_score
(
neighbor_udg
)
if
neighbor_score
>
max_score
:
max_score
=
neighbor_score
max_neighbor_name
=
neighbor_name
if
max_neighbor_name
is
None
:
return
None
else
:
neighbors
.
discard
(
max_neighbor_name
)
return
max_neighbor_name
def
greedy_partial_order
(
d2g
,
node
):
used_nodes
=
{
str
(
n
):
False
for
n
in
d2g
.
nodes
()}
used_nodes
[
str
(
node
)]
=
True
current_node
=
node
current_udg
=
d2g
.
node_by_idx
[
int
(
node
)]
po
=
PartialOrder
()
po
.
add_right
(
current_udg
)
forward
=
True
reverse
=
True
while
forward
or
reverse
:
next_node_name
=
_next_node
(
d2g
,
po
,
str
(
current_node
),
used_nodes
)
if
next_node_name
is
not
None
:
next_udg
=
d2g
.
node_by_idx
[
int
(
next_node_name
)]
po
.
add_right
(
next_udg
)
used_nodes
[
next_node_name
]
=
True
current_node
=
next_node_name
else
:
if
forward
:
forward
=
False
po
.
reverse_order
()
current_node
=
str
(
po
.
udg_order
[
-
1
].
idx
)
print
(
po
.
score
,
"reverse"
)
else
:
reverse
=
False
return
po
deconvolution/main/d2_to_barcode_path.py
0 → 100644
View file @
7d2ef666
#!/usr/bin/env python3
import
networkx
as
nx
import
argparse
import
sys
import
random
from
deconvolution.d2graph
import
d2_graph
as
d2
from
barcodes.partialorder
import
greedy_partial_order
def
parse_arguments
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Greedy construction of a path through the d2 graph.'
)
parser
.
add_argument
(
'barcode_graph'
,
help
=
'The barcode graph file. Must be a gefx formatted file.'
)
parser
.
add_argument
(
'd2_graph'
,
help
=
'd2 graph to reduce. Must be a gexf formatted file.'
)
parser
.
add_argument
(
'--out_prefix'
,
'-o'
,
default
=
""
,
help
=
"Output file prefix."
)
args
=
parser
.
parse_args
()
if
args
.
out_prefix
==
""
:
args
.
out_prefix
=
'.'
.
join
(
args
.
d2_graph
.
split
(
'.'
)[:
-
1
])
return
args
def
main
():
# Parsing the arguments and validate them
args
=
parse_arguments
()
barcode_file
=
args
.
barcode_graph
d2_file
=
args
.
d2_graph
if
(
not
barcode_file
.
endswith
(
'.gexf'
))
or
(
not
d2_file
.
endswith
(
".gexf"
)):
print
(
"Inputs file must be gexf formatted"
,
file
=
sys
.
stderr
)
exit
(
1
)
# Loading
G
=
nx
.
read_gexf
(
barcode_file
)
d2g
=
d2
.
D2Graph
(
G
)
d2g
.
load
(
d2_file
)
# Take the principal component
largest_component_nodes
=
max
(
nx
.
connected_components
(
d2g
),
key
=
len
)
largest_component
=
d2g
.
subgraph
(
largest_component_nodes
)
all_nodes
=
list
(
largest_component
.
nodes
())
rnd_node
=
all_nodes
[
random
.
randint
(
0
,
len
(
all_nodes
)
-
1
)]
po
=
greedy_partial_order
(
largest_component
,
rnd_node
)
print
(
"barcodes"
,
len
(
po
))
print
(
"sets"
,
po
.
len_sets
)
print
(
"udgs"
,
po
.
len_udgs
)
print
(
"score"
,
po
.
score
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment