Commit 14b5f981 authored by Blaise Li's avatar Blaise Li
Browse files

Avoid crashing on empty DESeq2 results.

parent 800dd65b
......@@ -1751,8 +1751,11 @@ rule differential_expression:
counts_and_res = OPJ(
aligner, f"mapped_{genome}", "{counter}", "deseq2_{mapped_type}",
"{contrast}", "{orientation}_{biotype}", "{contrast}_counts_and_res.txt"),
log:
warnings = OPJ(log_dir, "differential_expression", "{counter}_{contrast}_{orientation}_{biotype}_{mapped_type}.warnings"),
threads: 4 # to limit memory usage, actually
run:
with warn_context(log.warnings) as warn:
counts_data = pd.read_table(input.counts_table, index_col="gene")
summaries = pd.read_table(input.summary_table, index_col=0)
# Running DESeq2
......@@ -1760,7 +1763,7 @@ rule differential_expression:
formula = Formula("~ lib")
(cond, ref) = CONTRAST2PAIR[wildcards.contrast]
if not any(counts_data[f"{ref}_{rep}"].any() for rep in REPS):
warnings.warn(
warn(
"Reference data is all zero.\nSkipping %s_%s_%s" % (
wildcards.contrast, wildcards.orientation, wildcards.biotype))
for outfile in output:
......@@ -1771,7 +1774,7 @@ rule differential_expression:
res, size_factors = do_deseq2(COND_NAMES, CONDITIONS, counts_data, formula=formula, contrast=contrast)
#except RRuntimeError as e:
except RuntimeError as e:
warnings.warn(
warn(
"Probably not enough usable data points to perform DESeq2 analyses:\n%s\nSkipping %s_%s_%s" % (
str(e), wildcards.contrast, wildcards.orientation, wildcards.biotype))
for outfile in output:
......@@ -1910,6 +1913,7 @@ def source_fold_data(wildcards):
OPJ(aligner, f"mapped_{genome}",
"{{counter}}", "deseq2_{{mapped_type}}", "{contrast}",
"{{orientation}}_{{biotype}}", "{contrast}_counts_and_res.txt"),
# OPJ("hisat2/mapped_on_C_elegans/feature_count/deseq2_on_C_elegans/{contrast}/rev_DNA_transposons_rmsk_families/{contrast}_counts_and_res.txt"),
contrast=contrasts_dict[wildcards.contrast_type])]
else:
return rules.differential_expression.output.counts_and_res
......@@ -1941,7 +1945,10 @@ rule make_contrast_lfc_boxplots:
boxplots = OPJ(aligner, f"mapped_{genome}", "{counter}", "fold_boxplots_{mapped_type}",
"{contrast_type}_{orientation}_{biotype}_{fold_type}_{id_list}_boxplots.pdf"),
log:
warnings = OPJ(log_dir, "make_contrast_lfc_boxplots", aligner, f"mapped_{genome}", "{counter}", "fold_boxplots_{mapped_type}", "{contrast_type}_{orientation}_{biotype}_{fold_type}_{id_list}.warnings"),
warnings = OPJ(
log_dir, "make_contrast_lfc_boxplots", aligner,
f"mapped_{genome}", "{counter}", "fold_boxplots_{mapped_type}",
"{contrast_type}_{orientation}_{biotype}_{fold_type}_{id_list}.warnings"),
params:
id_lists = set_id_lists,
run:
......@@ -1952,7 +1959,32 @@ rule make_contrast_lfc_boxplots:
# print("Reading fold changes from:", *input.data, sep="\n")
lfcs_dict = {}
for (contrast, filename) in zip(contrasts_dict[wildcards.contrast_type], input.data):
# Input files may come from a failed DESeq2 analysis
if test_na_file(filename):
warn(
"No %s results for %s_%s_%s_%s. Making dummy output." % (
wildcards.fold_type,
wildcards.mapped_type,
contrast,
wildcards.orientation,
wildcards.biotype))
# Should we make an empty lfc_data DataFrame instead?
# Make the file empty
open(output.boxplots, "w").close()
break
try:
lfc_data = pd.read_table(filename, index_col="gene")
except TypeError as err:
# if str(err) == "unsupported operand type(s) for -: 'str' and 'int'":
# warn(str(err))
# warn("Generating empty file.\n")
# # Make the file empty
# open(output.boxplots, "w").close()
# break
warn(
"Unexpected TypeError:\n{str(err)}\n"
"This occurred while parsing {filename}.")
raise
#print(type(lfc_data))
for (list_name, id_list) in params.id_lists.items():
try:
......@@ -1968,6 +2000,8 @@ rule make_contrast_lfc_boxplots:
#print("column has type:", type(selected_data))
#print(selected_data)
lfcs_dict[f"{contrast}_{list_name}"] = selected_data
else:
# No break encountered in the loop
lfcs = pd.DataFrame(lfcs_dict)
# lfcs = pd.DataFrame(
# {f"{contrast}_{list_name}" : pd.read_table(filename, index_col="gene").loc[
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment