Commit 49dbee15 authored by Rayan  CHIKHI's avatar Rayan CHIKHI

Merge branch 'master' of gitlab.pasteur.fr:ydufresne/10x-deconvolve

parents 4cdd31ca 00aae418
......@@ -39,6 +39,8 @@ rule d2_simplification:
d2_raw="{barcode_path}_d2_raw_{method}.gexf"
output:
simplified_d2="{barcode_path}_d2_simplified_{method}.gexf"
wildcard_constraints:
method="[A-Za-z0-9]+"
shell:
"python3 deconvolution/main/d2_reduction.py -o {output.simplified_d2} {input.barcode_graph} {input.d2_raw}"
......@@ -49,6 +51,8 @@ rule d2_generation:
output:
d2_file=f"{WORKDIR}/{{file}}_d2_raw_{{method}}.gexf"
threads: workflow.cores
wildcard_constraints:
method="[A-Za-z0-9]+"
run:
shell(f"python3 deconvolution/main/to_d2_graph.py {{input.barcode_graph}} --{{wildcards.method}} -t {{threads}} -o {WORKDIR}/{{wildcards.file}}_d2_raw_{{wildcards.method}}")
......
include: "Snakefile_data_simu"
include: "Snakefile_d2"
include: "Snakefile_d2_path"
WORKDIR = "snake_experiments" if "workdir" not in config else config["workdir"]
N = [1000]
N = [5000, 10000]
D = [10]
M = [2]
DEV = [0]
M = [2, 3]
DEV = [0, 1]
rule generate_compare:
input:
f"{WORKDIR}/eval_compare.tsv"
f"{WORKDIR}/eval_compare_raw.tsv"
rule comparable_tsv:
input:
expand(f"{WORKDIR}/simu_0_bar_n{{n}}_d{{d}}_m{{m}}-dev{{dev}}_d2_simplified_maxclq_eval.txt", m=M, d=D, n=N, dev=DEV)
d2_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_d2.txt", m=M, d=D, n=N, dev=DEV),
path_evals = expand(f"{WORKDIR}" + "/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{{type}}_maxclq_eval_path.txt", m=M, d=D, n=N, dev=DEV)
output:
f"{WORKDIR}/eval_compare.tsv"
f"{WORKDIR}/eval_compare_{{type}}.tsv"
run:
with open(str(output), "w") as out:
for n in N:
for d in D:
for m in M:
for dev in DEV:
with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_simplified_maxclq_eval.txt") as f:
longest_path = f.readlines()[8].strip().split(': ')[-1]
print(f"{n}\t{m}\t{dev}\t{longest_path}", file=out)
with open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_d2.txt") as f_d2,\
open(f"{WORKDIR}/simu_0_bar_n{n}_d{d}_m{m}-dev{dev}_d2_{wildcards.type}_maxclq_eval_path.txt") as d_path:
longest_path_d2 = f_d2.readlines()[8].strip().split(': ')[-1]
path_eval_lines = d_path.readlines()
greedy_path = path_eval_lines[-1].strip().split(' ')[-1]
splits = "/".join(path_eval_lines[-2].strip().split(': ')[-1].split(' - '))
print(f"{n}\t{m}\t{dev}\t{longest_path_d2}\t{greedy_path}\t{splits}", file=out)
rule eval:
rule eval_d2:
input:
"{file}_d2_simplified_maxclq.gexf"
"{file}_d2_{type}_maxclq.gexf"
output:
"{file}_d2_simplified_maxclq_eval.txt"
"{file}_d2_{type}_maxclq_eval_d2.txt"
shell:
"python3 deconvolution/main/evaluate.py -t d2 {input} > {output}"
rule eval_path:
input:
barcode="{barcode_file}.gexf",
path="{barcode_file}_d2_{type}_maxclq_path.gexf"
output:
"{barcode_file}_d2_{type}_maxclq_eval_path.txt"
shell:
"python3 deconvolution/main/evaluate.py --type path {input.path} -l -b {input.barcode} > {output}"
number_try = 25
threshold = 0.95
rule d2_path_generation:
input:
barcode="{path}.gexf",
d2="{path}_d2_{type}_{method}.gexf"
output:
"{path}_d2_{type}_{method}_path.gexf"
run:
best = 0
for _ in range(number_try):
shell("python3 deconvolution/main/d2_to_path.py {input.barcode} {input.d2} > {output}_tmp.out")
score = 0
with open(f"{output}_tmp.out") as out:
score_line = out.readlines()[-2].strip()
score = float(score_line.split(' ')[-1])
print(score)
if score > best:
best = score
shell("cp {output} {output}.best")
if score > threshold:
print("Score sufficient: quitting...")
break
shell("rm {output}_tmp.out")
shell("cp {output}.best {output}")
......@@ -13,6 +13,7 @@ def parse_arguments():
parser.add_argument('--output_prefix', '-o', default="d2_graph", help="Output file prefix.")
parser.add_argument('--threads', '-t', default=8, type=int, help='Number of thread to use for dgraph computation')
parser.add_argument('--debug', '-d', action='store_true', help="Debug")
parser.add_argument('--verbose', '-v', action='store_true', help="Verbose")
parser.add_argument('--edge_divergence_threshold', '-dt', default=0.25, type=float, help='Divergence threshold value to link two udgs in the d2-graph')
parser.add_argument('--maxclq', '-c', action='store_true', help="Enable max clique community detection (default behaviour)")
parser.add_argument('--louvain', '-l', action='store_true', help="Enable Louvain community detection instead of all max-cliques")
......@@ -31,6 +32,7 @@ def main():
def dprint(s):
from datetime import datetime
t = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print(t, s)
dprint("loading barcode graph")
if filename.endswith('.gexf'):
......@@ -65,7 +67,7 @@ def main():
d2g = d2.D2Graph(G, debug=debug, debug_path=debug_path)
dprint("D2 graph object created")
dprint("constructing d2 graph from barcode graph")
d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads)
d2g.construct_from_barcodes(neighbor_threshold=args.edge_divergence_threshold, clique_mode=clique_mode, threads=args.threads, verbose=args.verbose)
dprint("[debug] d2 graph constructed")
# d2g.save(f"{args.output_prefix}.tsv")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment