Skip to content
Snippets Groups Projects
Commit 5704975c authored by Blaise Li's avatar Blaise Li
Browse files

Snakefile to prepare data submission.

It creates links in a directory hierarchy from a configuration file.
parent fc34a5ca
Branches
No related tags found
No related merge requests found
import sys
major, minor = sys.version_info[:2]
if major < 3 or (major == 3 and minor < 6):
sys.exit("Need at least python 3.6\n")
import os
OPJ = os.path.join
from pathlib import Path
ref_info = config["ref"]
data_dir = ref_info["paper"]
data_info = config["data"]
def determine_fqgz_and_md5file():
for (lib_type, analyses) in data_info.items():
for (analysis, analysis_info) in analyses.items():
for (library, raw_data) in analysis_info["libraries"].items():
assert Path(raw_data).exists()
yield (
OPJ(data_dir, lib_type, analysis, f"{library}.fastq.gz"),
OPJ(data_dir, lib_type, analysis, f"{library}.fastq.gz.md5"))
def lib2data(wildcards):
return data_info[wildcards.lib_type][wildcards.analysis]["libraries"][wildcards.library]
rule all:
input:
list(zip(*determine_fqgz_and_md5file()))
rule link_raw_data:
"""This rule installs the raw data in a local directory using symlinks.
The location of the original files is taken from the configuration."""
input:
raw = lib2data,
output:
link = os.path.join(data_dir, "{lib_type}/{analysis}/{library}.fastq.gz")
message:
"Making link {output.link} to raw data {input.raw}."
run:
os.symlink(os.path.abspath(input.raw), output.link)
rule compute_md5sum:
"""This rule installs the raw data in a local directory using symlinks.
The location of the original files is taken from the configuration."""
input:
link = rules.link_raw_data.output.link,
output:
md5 = os.path.join(data_dir, "{lib_type}/{analysis}/{library}.fastq.gz.md5")
message:
"Computing md5sum for {input.link}."
shell:
"""
md5sum {input.link} > {output.md5}
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment