Commit 49dbee15 authored by Rayan  CHIKHI's avatar Rayan CHIKHI
Browse files

Merge branch 'master' of gitlab.pasteur.fr:ydufresne/10x-deconvolve

parents 4cdd31ca 00aae418
...@@ -39,6 +39,8 @@ rule d2_simplification: ...@@ -39,6 +39,8 @@ rule d2_simplification:
d2_raw="{barcode_path}_d2_raw_{method}.gexf" d2_raw="{barcode_path}_d2_raw_{method}.gexf"
output: output:
simplified_d2="{barcode_path}_d2_simplified_{method}.gexf" simplified_d2="{barcode_path}_d2_simplified_{method}.gexf"
wildcard_constraints:
method="[A-Za-z0-9]+"
shell: shell:
"python3 deconvolution/main/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}" "python3 deconvolution/main/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
...@@ -49,6 +51,8 @@ rule d2_generation: ...@@ -49,6 +51,8 @@ rule d2_generation:
output: output:
d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf" d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf"
threads: workflow.cores threads: workflow.cores
wildcard_constraints:
method="[A-Za-z0-9]+"
run: run:
shell(f"python3 deconvolution/main/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -t {{threads}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}") shell(f"python3 deconvolution/main/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -t {{threads}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
......
include: "Snakefile_data_simu" include: "Snakefile_data_simu"
include: "Snakefile_d2" include: "Snakefile_d2"
include: "Snakefile_d2_path"
WORKDIR = "snake_experiments" if "workdir" not in config else config["workdir"] WORKDIR = "snake_experiments" if "workdir" not in config else config["workdir"]
N = [1000] N = [5000, 10000]
D = [10] D = [10]
M = [2] M = [2, 3]
DEV = [0] DEV = [0, 1]
rule generate_compare: rule generate_compare:
input: input:
f"{WORKDIR}/eval_compare.tsv" f"{WORKDIR}/eval_compare_raw.tsv"
rule comparable_tsv: rule comparable_tsv:
input: input:
expand(f"{WORKDIR}/simu_0_bar_n{{n}}_d{{d}}_m{{m}}-dev{{dev}}_d2_simplified_maxclq_eval.txt", m=M, d=D, n=N, dev=DEV) d2_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_d2.txt", m=M, d=D, n=N, dev=DEV),
path_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_path.txt", m=M, d=D, n=N, dev=DEV)
output: output:
f"{WORKDIR}/eval_compare.tsv" f"{WORKDIR}/eval_compare_{{type}}.tsv"
run: run:
with open(str(output), "w") as out: with open(str(output), "w") as out:
for n in N: for n in N:
for d in D: for d in D:
for m in M: for m in M:
for dev in DEV: for dev in DEV:
with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_simplified_maxclq_eval.txt") as f: with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_d2.txt") as f_d2,\
longest_path = f.readlines()[8].strip().split(': ')[-1] open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_path.txt") as d_path:
print(f"{n}\t{m}\t{dev}\t{longest_path}", file=out) longest_path_d2 = f_d2.readlines()[8].strip().split(': ')[-1]
path_eval_lines = d_path.readlines()
greedy_path = path_eval_lines[-1].strip().split(' ')[-1]
splits = "/".join(path_eval_lines[-2].strip().split(': ')[-1].split(' - '))
print(f"{n}\t{m}\t{dev}\t{longest_path_d2}\t{greedy_path}\t{splits}", file=out)
rule eval: rule eval_d2:
input: input:
"{file}_d2_simplified_maxclq.gexf" "{file}_d2_{type}_maxclq.gexf"
output: output:
"{file}_d2_simplified_maxclq_eval.txt" "{file}_d2_{type}_maxclq_eval_d2.txt"
shell: shell:
"python3 deconvolution/main/evaluate.py -t d2 {input} > {output}" "python3 deconvolution/main/evaluate.py -t d2 {input} > {output}"
rule eval_path:
input:
barcode="{barcode_file}.gexf",
path="{barcode_file}_d2_{type}_maxclq_path.gexf"
output:
"{barcode_file}_d2_{type}_maxclq_eval_path.txt"
shell:
"python3 deconvolution/main/evaluate.py --type path {input.path} -l -b {input.barcode} > {output}"
number_try = 25
threshold = 0.95
rule d2_path_generation:
input:
barcode="{path}.gexf",
d2="{path}_d2_{type}_{method}.gexf"
output:
"{path}_d2_{type}_{method}_path.gexf"
run:
best = 0
for _ in range(number_try):
shell("python3 deconvolution/main/d2_to_path.py {input.barcode} {input.d2} > {output}_tmp.out")
score = 0
with open(f"{output}_tmp.out") as out:
score_line = out.readlines()[-2].strip()
score = float(score_line.split(' ')[-1])
print(score)
if score > best:
best = score
shell("cp {output} {output}.best")
if score > threshold:
print("Score sufficient: quitting...")
break
shell("rm {output}_tmp.out")
shell("cp {output}.best {output}")
...@@ -13,6 +13,7 @@ def parse_arguments(): ...@@ -13,6 +13,7 @@ def parse_arguments():
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.") parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.")
parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation') parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
parser.add_argument('--debug', '-d', action='store_true', help="Debug") parser.add_argument('--debug', '-d', action='store_true', help="Debug")
parser.add_argument('--verbose', '-v', action='store_true', help="Verbose")
parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph') parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph')
parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)") parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)")
parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques") parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques")
...@@ -31,6 +32,7 @@ def main(): ...@@ -31,6 +32,7 @@ def main():
def dprint(s): def dprint(s):
from datetime import datetime from datetime import datetime
t = datetime.now().strftime('%Y-%m-%d %H:%M:%S') t = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(t, s)
dprint("loading barcode graph") dprint("loading barcode graph")
if filename.endswith('.gexf'): if filename.endswith('.gexf'):
...@@ -65,7 +67,7 @@ def main(): ...@@ -65,7 +67,7 @@ def main():
d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path) d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path)
dprint("D2 graph object created") dprint("D2 graph object created")
dprint("constructing d2 graph from barcode graph") dprint("constructing d2 graph from barcode graph")
d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads) d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads, verbose=args.verbose)
dprint("[debug] d2 graph constructed") dprint("[debug] d2 graph constructed")
# d2g.save(f"{args.output_prefix}.tsv") # d2g.save(f"{args.output_prefix}.tsv")
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment