From d5d6959f0a48f4ecf93ec1a9908812d8283cc982 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Fri, 1 Feb 2019 15:57:42 +0100 Subject: [PATCH] Better handle errors due to missing data. --- small_RNA-seq/small_RNA-seq.snakefile | 120 ++++++++++++++++---------- 1 file changed, 74 insertions(+), 46 deletions(-) diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 1c38395..e532bf4 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -2400,39 +2400,51 @@ rule make_fold_heatmap: all_folds = source_gathered_folds, output: fold_heatmap = OPJ(output_dir, "figures", "fold_heatmaps", "{small_type}_{fold_type}_heatmap.pdf"), + log: + warnings = OPJ(log_dir, "make_fold_heatmap", "{small_type}_{fold_type}.warnings"), benchmark: OPJ(log_dir, "make_fold_heatmap", "{small_type}_{fold_type}_benchmark.txt"), threads: 16 # to limit memory usage, actually resources: mem_mb=8150 run: - usetex = mpl.rcParams.get("text.usetex", False) - all_folds = pd.read_table(input.all_folds, index_col=["gene"]) - #gene_colours = all_folds.small_type.map(small_type2colour) - # Faster - gene_colours = all_folds.apply(small_type_colour_setter, axis=1) - if usetex: - gene_colours.name = texscape(f"{wildcards.small_type}_{wildcards.fold_type}") - else: - gene_colours.name = f"{wildcards.small_type}_{wildcards.fold_type}" - #all_folds.drop(["cosmid", "name", "small_type"], axis=1) - #all_folds[all_folds.columns.difference(["cosmid", "name", "small_type"])] - # https://github.com/mwaskom/seaborn/issues/1262 - #mpl.use("agg") - try: - #labels_dict = merge_with(tuple, small_type2colour, DefaultCounter(all_folds.small_type)) - # https://stackoverflow.com/a/47396625/1878788 - small_type_counts = Counter(small_type2colour.keys()) - small_type_counts.update(all_folds.small_type) - labels_dict = {small_type : (small_type2colour[small_type], count - 1) for (small_type, count) in small_type_counts.items()} - save_plot( - output.fold_heatmap, plot_fold_heatmap, - all_folds.drop(["cosmid", "name", "small_type"], axis=1), - gene_colours, labels_dict, tight=False, rasterize=True) - except ValueError as e: - print(labels_dict) - raise - #mpl.use("PDF") + with warn_context(log.warnings) as warn: + usetex = mpl.rcParams.get("text.usetex", False) + all_folds = pd.read_table(input.all_folds, index_col=["gene"]) + #gene_colours = all_folds.small_type.map(small_type2colour) + # Faster + gene_colours = all_folds.apply(small_type_colour_setter, axis=1) + if usetex: + gene_colours.name = texscape(f"{wildcards.small_type}_{wildcards.fold_type}") + else: + gene_colours.name = f"{wildcards.small_type}_{wildcards.fold_type}" + #all_folds.drop(["cosmid", "name", "small_type"], axis=1) + #all_folds[all_folds.columns.difference(["cosmid", "name", "small_type"])] + # https://github.com/mwaskom/seaborn/issues/1262 + #mpl.use("agg") + try: + #labels_dict = merge_with(tuple, small_type2colour, DefaultCounter(all_folds.small_type)) + # https://stackoverflow.com/a/47396625/1878788 + small_type_counts = Counter(small_type2colour.keys()) + small_type_counts.update(all_folds.small_type) + labels_dict = {small_type : (small_type2colour[small_type], count - 1) for (small_type, count) in small_type_counts.items()} + save_plot( + output.fold_heatmap, plot_fold_heatmap, + all_folds.drop(["cosmid", "name", "small_type"], axis=1), + gene_colours, labels_dict, tight=False, rasterize=True) + except ValueError as err: + if str(err) == "The number of observations cannot be determined on an empty distance matrix.": + warn("\n".join([ + "Got ValueError:", + f"{str(err)}", + f"No data to plot in {output.fold_heatmap}\n"])) + warn("Generating empty file.\n") + # Make the file empty + open(output.fold_heatmap, "w").close() + else: + print(labels_dict) + raise + #mpl.use("PDF") #def plot_norm_counts(counts_data, summaries): @@ -3822,30 +3834,46 @@ rule make_gene_list_lfc_boxplots: output: boxplots = OPJ(output_dir, "figures", "{contrast}", "{contrast}_{small_type}_{fold_type}_{gene_list}_boxplots.pdf") + log: + warnings = OPJ(log_dir, "make_gene_list_lfc_boxplots", "{contrast}_{small_type}_{fold_type}_{gene_list}.warnings"), params: id_lists = set_id_lists, run: - lfcs_dict = {} - lfc_data = pd.read_table(input.data, index_col="gene") - for (list_name, id_list) in params.id_lists.items(): + with warn_context(log.warnings) as warn: + lfcs_dict = {} + lfc_data = pd.read_table(input.data, index_col="gene") + for (list_name, id_list) in params.id_lists.items(): + try: + selected_rows = lfc_data.loc[lfc_data.index.intersection(id_list)] + except TypeError as err: + print(params.id_lists) + print(type(id_list)) + print(lfc_data.index.intersection(id_list)) + raise + selected_data = selected_rows[wildcards.fold_type] + lfcs_dict[list_name] = selected_data + lfcs = pd.DataFrame(lfcs_dict) + # lfcs = pd.DataFrame( + # {list_name : lfc_data.loc[set(id_list)][wildcards.fold_type] for ( + # list_name, id_list) in params.id_lists.items()}) + title = f"{wildcards.small_type} folds for {wildcards.contrast}" try: - selected_rows = lfc_data.loc[lfc_data.index.intersection(id_list)] + save_plot( + output.boxplots, + plot_boxplots, + lfcs, wildcards.fold_type, + title=title) except TypeError as err: - print(params.id_lists) - print(type(id_list)) - print(lfc_data.index.intersection(id_list)) - raise - selected_data = selected_rows[wildcards.fold_type] - lfcs_dict[list_name] = selected_data - lfcs = pd.DataFrame(lfcs_dict) - # lfcs = pd.DataFrame( - # {list_name : lfc_data.loc[set(id_list)][wildcards.fold_type] for ( - # list_name, id_list) in params.id_lists.items()}) - save_plot( - output.boxplots, - plot_boxplots, - lfcs, wildcards.fold_type, - title=f"{wildcards.small_type} folds for {wildcards.contrast}") + if str(err) == "Empty 'DataFrame': no numeric data to plot": + warn("\n".join([ + "Got TypeError:", + f"{str(err)}", + f"No data to plot for {title}\n"])) + warn("Generating empty file.\n") + # Make the file empty + open(output.boxplots, "w").close() + else: + raise rule make_contrast_lfc_boxplots: -- GitLab