Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Yoann DUFRESNE
linked reads molecule ordering
Commits
49dbee15
Commit
49dbee15
authored
May 14, 2020
by
Rayan CHIKHI
Browse files
Merge branch 'master' of gitlab.pasteur.fr:ydufresne/10x-deconvolve
parents
4cdd31ca
00aae418
Changes
4
Show whitespace changes
Inline
Side-by-side
Snakefile_d2
View file @
49dbee15
...
...
@@ -39,6 +39,8 @@ rule d2_simplification:
d2_raw="{barcode_path}_d2_raw_{method}.gexf"
output:
simplified_d2="{barcode_path}_d2_simplified_{method}.gexf"
wildcard_constraints:
method="[A-Za-z0-9]+"
shell:
"python3 deconvolution/main/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
...
...
@@ -49,6 +51,8 @@ rule d2_generation:
output:
d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf"
threads: workflow.cores
wildcard_constraints:
method="[A-Za-z0-9]+"
run:
shell(f"python3 deconvolution/main/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -t {{threads}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
...
...
Snakefile_d2_eval
View file @
49dbee15
include: "Snakefile_data_simu"
include: "Snakefile_d2"
include: "Snakefile_d2_path"
WORKDIR = "snake_experiments" if "workdir" not in config else config["workdir"]
N = [
1
000]
N = [
5000, 10
000]
D = [10]
M = [2]
DEV = [0]
M = [2
, 3
]
DEV = [0
, 1
]
rule generate_compare:
input:
f"{WORKDIR}/eval_compare.tsv"
f"{WORKDIR}/eval_compare
_raw
.tsv"
rule comparable_tsv:
input:
expand(f"{WORKDIR}/simu_0_bar_n{{n}}_d{{d}}_m{{m}}-dev{{dev}}_d2_simplified_maxclq_eval.txt", m=M, d=D, n=N, dev=DEV)
d2_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_d2.txt", m=M, d=D, n=N, dev=DEV),
path_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_path.txt", m=M, d=D, n=N, dev=DEV)
output:
f"{WORKDIR}/eval_compare.tsv"
f"{WORKDIR}/eval_compare
_{{type}}
.tsv"
run:
with open(str(output), "w") as out:
for n in N:
for d in D:
for m in M:
for dev in DEV:
with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_simplified_maxclq_eval.txt") as f:
longest_path = f.readlines()[8].strip().split(': ')[-1]
print(f"{n}\t{m}\t{dev}\t{longest_path}", file=out)
with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_d2.txt") as f_d2,\
open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_path.txt") as d_path:
longest_path_d2 = f_d2.readlines()[8].strip().split(': ')[-1]
path_eval_lines = d_path.readlines()
greedy_path = path_eval_lines[-1].strip().split(' ')[-1]
splits = "/".join(path_eval_lines[-2].strip().split(': ')[-1].split(' - '))
print(f"{n}\t{m}\t{dev}\t{longest_path_d2}\t{greedy_path}\t{splits}", file=out)
rule eval:
rule eval
_d2
:
input:
"{file}_d2_
simplified
_maxclq.gexf"
"{file}_d2_
{type}
_maxclq.gexf"
output:
"{file}_d2_
simplified
_maxclq_eval.txt"
"{file}_d2_
{type}
_maxclq_eval
_d2
.txt"
shell:
"python3 deconvolution/main/evaluate.py -t d2 {input} > {output}"
rule eval_path:
input:
barcode="{barcode_file}.gexf",
path="{barcode_file}_d2_{type}_maxclq_path.gexf"
output:
"{barcode_file}_d2_{type}_maxclq_eval_path.txt"
shell:
"python3 deconvolution/main/evaluate.py --type path {input.path} -l -b {input.barcode} > {output}"
Snakefile_d2_path
0 → 100644
View file @
49dbee15
number_try = 25
threshold = 0.95
rule d2_path_generation:
input:
barcode="{path}.gexf",
d2="{path}_d2_{type}_{method}.gexf"
output:
"{path}_d2_{type}_{method}_path.gexf"
run:
best = 0
for _ in range(number_try):
shell("python3 deconvolution/main/d2_to_path.py {input.barcode} {input.d2} > {output}_tmp.out")
score = 0
with open(f"{output}_tmp.out") as out:
score_line = out.readlines()[-2].strip()
score = float(score_line.split(' ')[-1])
print(score)
if score > best:
best = score
shell("cp {output} {output}.best")
if score > threshold:
print("Score sufficient: quitting...")
break
shell("rm {output}_tmp.out")
shell("cp {output}.best {output}")
deconvolution/main/to_d2_graph.py
View file @
49dbee15
...
...
@@ -13,6 +13,7 @@ def parse_arguments():
parser
.
add_argument
(
'--output_prefix'
,
'-o'
,
default
=
"d2_graph"
,
help
=
"Output file prefix."
)
parser
.
add_argument
(
'--threads'
,
'-t'
,
default
=
8
,
type
=
int
,
help
=
'Number of thread to use for dgraph computation'
)
parser
.
add_argument
(
'--debug'
,
'-d'
,
action
=
'store_true'
,
help
=
"Debug"
)
parser
.
add_argument
(
'--verbose'
,
'-v'
,
action
=
'store_true'
,
help
=
"Verbose"
)
parser
.
add_argument
(
'--edge_divergence_threshold'
,
'-dt'
,
default
=
0.25
,
type
=
float
,
help
=
'Divergence threshold value to link two udgs in the d2-graph'
)
parser
.
add_argument
(
'--maxclq'
,
'-c'
,
action
=
'store_true'
,
help
=
"Enable max clique community detection (default behaviour)"
)
parser
.
add_argument
(
'--louvain'
,
'-l'
,
action
=
'store_true'
,
help
=
"Enable Louvain community detection instead of all max-cliques"
)
...
...
@@ -31,6 +32,7 @@ def main():
def
dprint
(
s
):
from
datetime
import
datetime
t
=
datetime
.
now
().
strftime
(
'%Y-%m-%d %H:%M:%S'
)
print
(
t
,
s
)
dprint
(
"loading barcode graph"
)
if
filename
.
endswith
(
'.gexf'
):
...
...
@@ -65,7 +67,7 @@ def main():
d2g
=
d2
.
D2Graph
(
G
,
debug
=
debug
,
debug_path
=
debug_path
)
dprint
(
"D2 graph object created"
)
dprint
(
"constructing d2 graph from barcode graph"
)
d2g
.
construct_from_barcodes
(
neighbor_threshold
=
args
.
edge_divergence_threshold
,
clique_mode
=
clique_mode
,
threads
=
args
.
threads
)
d2g
.
construct_from_barcodes
(
neighbor_threshold
=
args
.
edge_divergence_threshold
,
clique_mode
=
clique_mode
,
threads
=
args
.
threads
,
verbose
=
args
.
verbose
)
dprint
(
"[debug] d2 graph constructed"
)
# d2g.save(f"{args.output_prefix}.tsv")
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment