Commit efb29419 authored by Blaise Li's avatar Blaise Li
Browse files

More robust merging of bigwig files.

Started to apply to RNA-seq and GRO-seq technique used in small RNA-seq.
parent 3c815d52
......@@ -49,7 +49,7 @@ import husl
from libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter, plot_lfc_distribution, plot_MA
from libworkflows import get_chrom_sizes, column_converter
from libworkflows import strip_split, last_lines, save_plot, test_na_file
from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS
from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS, warn_context
from libworkflows import read_htseq_counts, sum_htseq_counts
from libworkflows import read_feature_counts, sum_feature_counts
......@@ -969,20 +969,44 @@ rule merge_bigwig_reps:
#expand(OPJ(output_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}_on_C_elegans_norm_{{orientation}}.bw"), rep=[rep for rep in REPS if frozenset((wildcards.lib, rep)) not in forbidden]),
output:
bw = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"),
log:
warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"),
threads: 2 # to limit memory usage, actually
run:
bws = [pyBigWig.open(bw_filename) for bw_filename in input]
#for bw_filename in input:
# bws.append(pyBigWig.open(bw_filename))
bw_out = pyBigWig.open(output.bw, "w")
bw_out.addHeader(list(bws[0].chroms().items()))
for (chrom, chrom_len) in bw_out.chroms().items():
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
with warn_context(log.warnings) as warn:
try:
bws = [pyBigWig.open(bw_filename) for bw_filename in input]
#for bw_filename in input:
# bws.append(pyBigWig.open(bw_filename))
except RuntimeError as e:
warn(str(e))
warn("Generating empty file.\n")
# Make the file empty
open(output.bw, "w").close()
else:
bw_out = pyBigWig.open(output.bw, "w")
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
try:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
except KeyError as e:
warn(str(e))
warn(f"Chromosome {chrom} might be missing from one of the input files.\n")
for filename, bw in zip(input, bws):
msg = " ".join([f"{filename}:", *list(bw.chroms().keys())])
warn(f"{msg}:\n")
#raise
warn(f"The bigwig files without {chrom} will be skipped.\n")
to_use = [bw for bw in bws if chrom in bw.chroms()]
if to_use:
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0)
else:
means = np.zeros(chrom_len)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
rule join_all_counts:
......
......@@ -45,7 +45,7 @@ from rpy2.robjects import Formula, StrVector
from libhts import do_deseq2, status_setter, plot_lfc_distribution, plot_MA
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, plot_norm_correlations, plot_counts_distribution
from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter
from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS
from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS, warn_context
from libworkflows import read_htseq_counts, sum_htseq_counts
from libworkflows import read_intersect_counts, sum_intersect_counts
from libworkflows import read_feature_counts, sum_feature_counts
......@@ -690,19 +690,44 @@ rule merge_bigwig_reps:
expand(OPJ(mapping_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}", "{{lib}}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), rep=REPS),
output:
bw = OPJ(mapping_dir, aligner, "mapped_C_elegans", "{lib}_mean", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"),
log:
warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"),
threads: 12 # to limit memory usage, actually
run:
bws = [pyBigWig.open(bw_filename) for bw_filename in input]
bw_out = pyBigWig.open(output.bw, "w")
bw_out.addHeader(list(bws[0].chroms().items()))
for (chrom, chrom_len) in bw_out.chroms().items():
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
with warn_context(log.warnings) as warn:
try:
bws = [pyBigWig.open(bw_filename) for bw_filename in input]
#for bw_filename in input:
# bws.append(pyBigWig.open(bw_filename))
except RuntimeError as e:
warn(str(e))
warn("Generating empty file.\n")
# Make the file empty
open(output.bw, "w").close()
else:
bw_out = pyBigWig.open(output.bw, "w")
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
try:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
except KeyError as e:
warn(str(e))
warn(f"Chromosome {chrom} might be missing from one of the input files.\n")
for filename, bw in zip(input, bws):
msg = " ".join([f"{filename}:", *list(bw.chroms().keys())])
warn(f"{msg}:\n")
#raise
warn(f"The bigwig files without {chrom} will be skipped.\n")
to_use = [bw for bw in bws if chrom in bw.chroms()]
if to_use:
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0)
else:
means = np.zeros(chrom_len)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
rule join_all_counts:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment