diff --git a/PRO-seq/PRO-seq.snakefile b/PRO-seq/PRO-seq.snakefile index 36260c1d9f2dbc38014d3ba1aa8e44d78b20f13f..8c1c256f8952ac05d04d3a82f784960755c30eab 100644 --- a/PRO-seq/PRO-seq.snakefile +++ b/PRO-seq/PRO-seq.snakefile @@ -49,7 +49,7 @@ import husl from libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter, plot_lfc_distribution, plot_MA from libworkflows import get_chrom_sizes, column_converter from libworkflows import strip_split, last_lines, save_plot, test_na_file -from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS +from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS, warn_context from libworkflows import read_htseq_counts, sum_htseq_counts from libworkflows import read_feature_counts, sum_feature_counts @@ -969,20 +969,44 @@ rule merge_bigwig_reps: #expand(OPJ(output_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}_on_C_elegans_norm_{{orientation}}.bw"), rep=[rep for rep in REPS if frozenset((wildcards.lib, rep)) not in forbidden]), output: bw = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"), + log: + warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"), + threads: 2 # to limit memory usage, actually run: - bws = [pyBigWig.open(bw_filename) for bw_filename in input] - #for bw_filename in input: - # bws.append(pyBigWig.open(bw_filename)) - bw_out = pyBigWig.open(output.bw, "w") - bw_out.addHeader(list(bws[0].chroms().items())) - for (chrom, chrom_len) in bw_out.chroms().items(): - assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) - means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0) - # bin size is 10 - bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) - bw_out.close() - for bw in bws: - bw.close() + with warn_context(log.warnings) as warn: + try: + bws = [pyBigWig.open(bw_filename) for bw_filename in input] + #for bw_filename in input: + # bws.append(pyBigWig.open(bw_filename)) + except RuntimeError as e: + warn(str(e)) + warn("Generating empty file.\n") + # Make the file empty + open(output.bw, "w").close() + else: + bw_out = pyBigWig.open(output.bw, "w") + bw_out.addHeader(list(chrom_sizes.items())) + for (chrom, chrom_len) in bw_out.chroms().items(): + try: + assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) + except KeyError as e: + warn(str(e)) + warn(f"Chromosome {chrom} might be missing from one of the input files.\n") + for filename, bw in zip(input, bws): + msg = " ".join([f"{filename}:", *list(bw.chroms().keys())]) + warn(f"{msg}:\n") + #raise + warn(f"The bigwig files without {chrom} will be skipped.\n") + to_use = [bw for bw in bws if chrom in bw.chroms()] + if to_use: + means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0) + else: + means = np.zeros(chrom_len) + # bin size is 10 + bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) + bw_out.close() + for bw in bws: + bw.close() rule join_all_counts: diff --git a/RNA_Seq_Cecere/RNA-seq.snakefile b/RNA_Seq_Cecere/RNA-seq.snakefile index fd29436770cbc4238b4c6491e3539b08a7d2ba56..c8a263ee179cfef139d58f46bf5f8310afc55fd9 100644 --- a/RNA_Seq_Cecere/RNA-seq.snakefile +++ b/RNA_Seq_Cecere/RNA-seq.snakefile @@ -45,7 +45,7 @@ from rpy2.robjects import Formula, StrVector from libhts import do_deseq2, status_setter, plot_lfc_distribution, plot_MA from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, plot_norm_correlations, plot_counts_distribution from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter -from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS +from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS, warn_context from libworkflows import read_htseq_counts, sum_htseq_counts from libworkflows import read_intersect_counts, sum_intersect_counts from libworkflows import read_feature_counts, sum_feature_counts @@ -690,19 +690,44 @@ rule merge_bigwig_reps: expand(OPJ(mapping_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}", "{{lib}}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), rep=REPS), output: bw = OPJ(mapping_dir, aligner, "mapped_C_elegans", "{lib}_mean", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"), + log: + warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"), threads: 12 # to limit memory usage, actually run: - bws = [pyBigWig.open(bw_filename) for bw_filename in input] - bw_out = pyBigWig.open(output.bw, "w") - bw_out.addHeader(list(bws[0].chroms().items())) - for (chrom, chrom_len) in bw_out.chroms().items(): - assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) - means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0) - # bin size is 10 - bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) - bw_out.close() - for bw in bws: - bw.close() + with warn_context(log.warnings) as warn: + try: + bws = [pyBigWig.open(bw_filename) for bw_filename in input] + #for bw_filename in input: + # bws.append(pyBigWig.open(bw_filename)) + except RuntimeError as e: + warn(str(e)) + warn("Generating empty file.\n") + # Make the file empty + open(output.bw, "w").close() + else: + bw_out = pyBigWig.open(output.bw, "w") + bw_out.addHeader(list(chrom_sizes.items())) + for (chrom, chrom_len) in bw_out.chroms().items(): + try: + assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) + except KeyError as e: + warn(str(e)) + warn(f"Chromosome {chrom} might be missing from one of the input files.\n") + for filename, bw in zip(input, bws): + msg = " ".join([f"{filename}:", *list(bw.chroms().keys())]) + warn(f"{msg}:\n") + #raise + warn(f"The bigwig files without {chrom} will be skipped.\n") + to_use = [bw for bw in bws if chrom in bw.chroms()] + if to_use: + means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0) + else: + means = np.zeros(chrom_len) + # bin size is 10 + bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) + bw_out.close() + for bw in bws: + bw.close() rule join_all_counts: