Skip to content
Snippets Groups Projects
Commit 65a81e94 authored by Blaise Li's avatar Blaise Li
Browse files

Take results info from results folder.

Also update submission pipeline to accomodate for possible absence of
libraries for an analysis (for instance, if they have been commented out
in the .yaml file because they have already been submitted).
parent ea95755f
No related branches found
No related tags found
No related merge requests found
...@@ -196,6 +196,7 @@ else: ...@@ -196,6 +196,7 @@ else:
bw2condrep = None bw2condrep = None
for (libtype, analyses) in data_info.items(): for (libtype, analyses) in data_info.items():
for (analysis, analysis_info) in analyses.items(): for (analysis, analysis_info) in analyses.items():
if analysis_info["libraries"]:
for (library, rawdat) in analysis_info["libraries"].items(): for (library, rawdat) in analysis_info["libraries"].items():
assert Path(rawdat).exists() assert Path(rawdat).exists()
# if os.path.islink(rawdat): # if os.path.islink(rawdat):
...@@ -216,6 +217,7 @@ def determine_links(): ...@@ -216,6 +217,7 @@ def determine_links():
norm_type = wc_dict.get("norm_type", wc_dict.get("norm", wc_dict.get("normalizer"))) norm_type = wc_dict.get("norm_type", wc_dict.get("norm", wc_dict.get("normalizer")))
aligned_type = wc_dict.get("read_type", "reads") aligned_type = wc_dict.get("read_type", "reads")
for (analysis, analysis_info) in analyses.items(): for (analysis, analysis_info) in analyses.items():
if analysis_info["libraries"]:
for (library, rawdat) in analysis_info["libraries"].items(): for (library, rawdat) in analysis_info["libraries"].items():
assert Path(rawdat).exists() assert Path(rawdat).exists()
link_in_lib_type = OPJ(f"{paper}_{libtype}", f"{library}.fastq.gz") link_in_lib_type = OPJ(f"{paper}_{libtype}", f"{library}.fastq.gz")
...@@ -329,6 +331,7 @@ def fq_info(fqgz): ...@@ -329,6 +331,7 @@ def fq_info(fqgz):
def lib_type2md5s(wildcards): def lib_type2md5s(wildcards):
"""Find the md5sum files corresponding to raw data defined by *wildcards*.""" """Find the md5sum files corresponding to raw data defined by *wildcards*."""
for (analysis, analysis_info) in data_info[wildcards.libtype].items(): for (analysis, analysis_info) in data_info[wildcards.libtype].items():
if analysis_info["libraries"]:
for library in analysis_info["libraries"]: for library in analysis_info["libraries"]:
yield OPJ( yield OPJ(
paper, paper,
...@@ -340,6 +343,7 @@ def lib_type2md5s(wildcards): ...@@ -340,6 +343,7 @@ def lib_type2md5s(wildcards):
def lib_type2read_lens(wildcards): def lib_type2read_lens(wildcards):
"""Find the read lengths (number of sequencing cycles) corresponding to raw data defined by *wildcards*.""" """Find the read lengths (number of sequencing cycles) corresponding to raw data defined by *wildcards*."""
for (analysis, analysis_info) in data_info[wildcards.libtype].items(): for (analysis, analysis_info) in data_info[wildcards.libtype].items():
if analysis_info["libraries"]:
# Loop to have as many read lengths as there are md5 files (from lib_type2md5s) # Loop to have as many read lengths as there are md5 files (from lib_type2md5s)
for _ in analysis_info["libraries"]: for _ in analysis_info["libraries"]:
yield analysis_info["read_len"] yield analysis_info["read_len"]
...@@ -378,7 +382,7 @@ def get_link_for_analysis(wildcards): ...@@ -378,7 +382,7 @@ def get_link_for_analysis(wildcards):
potential_links = set() potential_links = set()
source_links = set() source_links = set()
for (analysis, analysis_info) in data_info[wildcards.libtype].items(): for (analysis, analysis_info) in data_info[wildcards.libtype].items():
if wildcards.library in analysis_info["libraries"]: if analysis_info["libraries"] and wildcards.library in analysis_info["libraries"]:
source_link = OPJ( source_link = OPJ(
paper, paper,
f"{wildcards.libtype}", f"{wildcards.libtype}",
...@@ -433,7 +437,7 @@ def get_link_bw_for_analysis(wildcards): ...@@ -433,7 +437,7 @@ def get_link_bw_for_analysis(wildcards):
potential_links = set() potential_links = set()
source_links = set() source_links = set()
for (analysis, analysis_info) in data_info[wildcards.libtype].items(): for (analysis, analysis_info) in data_info[wildcards.libtype].items():
if wildcards.library in analysis_info["libraries"]: if analysis_info["libraries"] and wildcards.library in analysis_info["libraries"]:
source_link = OPJ( source_link = OPJ(
paper, paper,
f"{wildcards.libtype}", f"{wildcards.libtype}",
...@@ -465,6 +469,7 @@ def lib_type2bw_md5s(wildcards): ...@@ -465,6 +469,7 @@ def lib_type2bw_md5s(wildcards):
norm_type = wc_dict.get("norm_type", wc_dict.get("norm", wc_dict.get("normalizer"))) norm_type = wc_dict.get("norm_type", wc_dict.get("norm", wc_dict.get("normalizer")))
aligned_type = wc_dict.get("read_type", "reads") aligned_type = wc_dict.get("read_type", "reads")
for (analysis, analysis_info) in data_info[wildcards.libtype].items(): for (analysis, analysis_info) in data_info[wildcards.libtype].items():
if analysis_info["libraries"]:
for library in analysis_info["libraries"]: for library in analysis_info["libraries"]:
yield OPJ( yield OPJ(
paper, paper,
......
...@@ -27,6 +27,7 @@ def formatwarning(message, category, filename, lineno, line=None): ...@@ -27,6 +27,7 @@ def formatwarning(message, category, filename, lineno, line=None):
warnings.simplefilter('always', UserWarning) warnings.simplefilter('always', UserWarning)
warnings.formatwarning = formatwarning warnings.formatwarning = formatwarning
from glob import glob
from pickle import dump, HIGHEST_PROTOCOL from pickle import dump, HIGHEST_PROTOCOL
from pathlib import Path from pathlib import Path
from yaml import safe_load as yload from yaml import safe_load as yload
...@@ -188,17 +189,42 @@ def main(): ...@@ -188,17 +189,42 @@ def main():
vars_from_loading_sf = set() vars_from_loading_sf = set()
issues = defaultdict(list) issues = defaultdict(list)
for (libtype, analyses) in data_info.items(): for (libtype, analyses) in data_info.items():
# TODO: use snakefile copied in the analysis folder, if present,
# this one otherwise
# snakefile for this libtype # snakefile for this libtype
analysis_snakefile = libtype_info[libtype]["snakefile"] libtype_snakefile = libtype_info[libtype]["snakefile"]
# name of the rule that produces bigwig files # name of the rule that produces bigwig files
bw_rulename = libtype_info[libtype]["rule"] bw_rulename = libtype_info[libtype]["rule"]
for (analysis, analysis_info) in analyses.items(): for (analysis, analysis_info) in analyses.items():
# results folder
try:
results_folder = OPR(analysis_info["results_folder"])
except KeyError:
results_folder = OPR(analysis_info["from_folder"])
### ###
# determine pattern of the bigwig file path # determine pattern of the bigwig file path
### ###
from_folder = OPR(analysis_info["from_folder"]) # Can we unambiguously get the configuration file
# configuration for this analysis # that has been used for this analysis?
analysis_config = yload(open(analysis_info["config"], "r")) try:
[analysis_config_file] = glob(OPJ(results_folder, "*.yaml"))
except ValueError:
warnings.warn(
"Either no or more than one .yaml file "
f"found in {results_folder}\n"
"Looking for one explicitly set.\n")
analysis_config_file = analysis_info["config"]
analysis_config = yload(open(analysis_config_file, "r"))
# Can we unambiguously get the snakefile
# that has been used for this analysis?
try:
[analysis_snakefile] = glob(OPJ(results_folder, "*.snakefile"))
except ValueError:
warnings.warn(
"Either no or more than one .snakefile file "
f"found in {results_folder}\n"
f"Using default {libtype_snakefile}\n")
analysis_snakefile = libtype_snakefile
common_vars = set(analysis_config.keys()) & (set(dir()) - vars_from_analyses) common_vars = set(analysis_config.keys()) & (set(dir()) - vars_from_analyses)
assert not common_vars, f"Variable overwriting hazard!\n{common_vars}" assert not common_vars, f"Variable overwriting hazard!\n{common_vars}"
vars_from_analyses |= set(analysis_config.keys()) vars_from_analyses |= set(analysis_config.keys())
...@@ -285,7 +311,7 @@ def main(): ...@@ -285,7 +311,7 @@ def main():
"\n".join(map(str, condrep2rawdat[libtype][(library, cond, rep)])))) "\n".join(map(str, condrep2rawdat[libtype][(library, cond, rep)]))))
rawdat2condrep[libtype][rawdat].append((library, cond, rep)) rawdat2condrep[libtype][rawdat].append((library, cond, rep))
condrep2rawdat[libtype][(library, cond, rep)].append(rawdat) condrep2rawdat[libtype][(library, cond, rep)].append(rawdat)
bw = OPJ(from_folder, bw_pattern.format( bw = OPJ(results_folder, bw_pattern.format(
lib=cond, rep=rep, lib=cond, rep=rep,
**libtype_info[libtype]["default_wildcards"]).format( **libtype_info[libtype]["default_wildcards"]).format(
genome=genome)) genome=genome))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment