Commit 55032d52 authored by Yoann Dufresne's avatar Yoann Dufresne
Browse files

update Snakefile for d2 generation. Update the readme to match the snakefile

parent 32eb0e52
......@@ -15,7 +15,7 @@ To know how to use it please use the -h option.
* use the snakefile "Snakemake_data_simu".
All the parameters can be an integer or a list of integer.
Each combination of parameter will generate a barcode graph
Each combination of parameter will generate a barcode graph.
Config parameters:
* n: the number of initial molecules
* m: average number of node merged in each barcode
......@@ -24,6 +24,13 @@ Config parameters:
### Data structures and algorithms
* Create a d2 graph from barcode graph: use the snakemake "Snakefile_d2"
The result will be generate as a compressed file in the workdir.
Config parameters:
* input: the input barcode graph (gexf format preferred).
* workdir: The working and output directory.
* to_d2_graph.py: Mount a barcode graph into memory and create a d2 graph from it.
* evaluate.py: take a d2 graph gexf file and and analyse it. Look for an approximation of the longest correct path to reconstruct a molecule graph. Take as input a d2 graph where the truth is known in the node names (the format used to create fake data).
......
import glob
WORKDIR = "snake_tests"
DATA = "real_data"
SAMPLE = "ema_spades.sam"
#DATA = "real_data"
#SAMPLE = "ema_spades_minovl15k_cont2k.sam"
WORKDIR = "snake_exec" if "workdir" not in config else config["workdir"]
INPUT = "data/simulated_barcode_1000_5_2.gexf" if "input" not in config else config["input"]
SAMPLE_NAME = INPUT[INPUT.rfind('/')+1:INPUT.rfind('.')]
rule all:
input:
f"{WORKDIR}/{SAMPLE}.tar.gz"
f"{WORKDIR}/{SAMPLE_NAME}.tar.gz"
rule compress_data:
......@@ -46,25 +44,28 @@ rule d2_generation:
f"python3 deconvolution/to_d2_graph.py {{input.barcode_graph}} -o {WORKDIR}/{{wildcards.file}}_d2_raw"
def define_graph_input(wildcards):
lst = glob.glob(f"{DATA}/{wildcards.file}.*")
lst.append(None)
#def define_graph_input(wildcards):
# lst = glob.glob(f"{DATA}/{wildcards.file}.*")
# lst.append(None)
#
# return WORKDIR + lst[0][lst[0].rfind('/'):]
return WORKDIR + lst[0][lst[0].rfind('/'):]
rule convert_file:
input: define_graph_input
output:
f"{WORKDIR}/{{file}}.gexf"
shell:
"python3 deconvolution/gexf_converter.py {input}"
#rule convert_file:
# input:
# f"{WORKDIR}/{INPUT[INPUT.rfind('/')+1:]}"
# output:
# f"{WORKDIR}/{{file}}.gexf"
# shell:
# "python3 deconvolution/gexf_converter.py {input}"
rule setup_workdir:
input:
barcode_graph=f"{DATA}/{{file}}"
f"{INPUT}"
output:
f"{WORKDIR}/{{file}}"
shell:
f"if [ ! -d {WORKDIR} ]; then mkdir {WORKDIR}; fi;"
f"cp {{input.barcode_graph}} {WORKDIR}"
f"{WORKDIR}/{SAMPLE_NAME}.gexf"
run:
shell(f"if [ ! -d {WORKDIR} ]; then mkdir {WORKDIR}; fi;")
shell(f"cp {{input}} {WORKDIR}")
if input[-5:] != ".gexf":
shell("python3 deconvolution/gexf_converter.py {input}")
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment