Skip to content
Snippets Groups Projects
Commit efb29419 authored by Blaise Li's avatar Blaise Li
Browse files

More robust merging of bigwig files.

Started to apply to RNA-seq and GRO-seq technique used in small RNA-seq.
parent 3c815d52
No related branches found
No related tags found
No related merge requests found
...@@ -49,7 +49,7 @@ import husl ...@@ -49,7 +49,7 @@ import husl
from libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter, plot_lfc_distribution, plot_MA from libhts import do_deseq2, median_ratio_to_pseudo_ref_size_factors, status_setter, plot_lfc_distribution, plot_MA
from libworkflows import get_chrom_sizes, column_converter from libworkflows import get_chrom_sizes, column_converter
from libworkflows import strip_split, last_lines, save_plot, test_na_file from libworkflows import strip_split, last_lines, save_plot, test_na_file
from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS from libworkflows import make_id_list_getter, filter_combinator, SHELL_FUNCTIONS, warn_context
from libworkflows import read_htseq_counts, sum_htseq_counts from libworkflows import read_htseq_counts, sum_htseq_counts
from libworkflows import read_feature_counts, sum_feature_counts from libworkflows import read_feature_counts, sum_feature_counts
...@@ -969,20 +969,44 @@ rule merge_bigwig_reps: ...@@ -969,20 +969,44 @@ rule merge_bigwig_reps:
#expand(OPJ(output_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}_on_C_elegans_norm_{{orientation}}.bw"), rep=[rep for rep in REPS if frozenset((wildcards.lib, rep)) not in forbidden]), #expand(OPJ(output_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}_on_C_elegans_norm_{{orientation}}.bw"), rep=[rep for rep in REPS if frozenset((wildcards.lib, rep)) not in forbidden]),
output: output:
bw = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"), bw = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"),
log:
warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"),
threads: 2 # to limit memory usage, actually
run: run:
bws = [pyBigWig.open(bw_filename) for bw_filename in input] with warn_context(log.warnings) as warn:
#for bw_filename in input: try:
# bws.append(pyBigWig.open(bw_filename)) bws = [pyBigWig.open(bw_filename) for bw_filename in input]
bw_out = pyBigWig.open(output.bw, "w") #for bw_filename in input:
bw_out.addHeader(list(bws[0].chroms().items())) # bws.append(pyBigWig.open(bw_filename))
for (chrom, chrom_len) in bw_out.chroms().items(): except RuntimeError as e:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) warn(str(e))
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0) warn("Generating empty file.\n")
# bin size is 10 # Make the file empty
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) open(output.bw, "w").close()
bw_out.close() else:
for bw in bws: bw_out = pyBigWig.open(output.bw, "w")
bw.close() bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
try:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
except KeyError as e:
warn(str(e))
warn(f"Chromosome {chrom} might be missing from one of the input files.\n")
for filename, bw in zip(input, bws):
msg = " ".join([f"{filename}:", *list(bw.chroms().keys())])
warn(f"{msg}:\n")
#raise
warn(f"The bigwig files without {chrom} will be skipped.\n")
to_use = [bw for bw in bws if chrom in bw.chroms()]
if to_use:
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0)
else:
means = np.zeros(chrom_len)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
rule join_all_counts: rule join_all_counts:
......
...@@ -45,7 +45,7 @@ from rpy2.robjects import Formula, StrVector ...@@ -45,7 +45,7 @@ from rpy2.robjects import Formula, StrVector
from libhts import do_deseq2, status_setter, plot_lfc_distribution, plot_MA from libhts import do_deseq2, status_setter, plot_lfc_distribution, plot_MA
from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, plot_norm_correlations, plot_counts_distribution from libhts import median_ratio_to_pseudo_ref_size_factors, size_factor_correlations, plot_norm_correlations, plot_counts_distribution
from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter from libworkflows import get_chrom_sizes, column_converter, make_id_list_getter
from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS from libworkflows import strip_split, save_plot, test_na_file, SHELL_FUNCTIONS, warn_context
from libworkflows import read_htseq_counts, sum_htseq_counts from libworkflows import read_htseq_counts, sum_htseq_counts
from libworkflows import read_intersect_counts, sum_intersect_counts from libworkflows import read_intersect_counts, sum_intersect_counts
from libworkflows import read_feature_counts, sum_feature_counts from libworkflows import read_feature_counts, sum_feature_counts
...@@ -690,19 +690,44 @@ rule merge_bigwig_reps: ...@@ -690,19 +690,44 @@ rule merge_bigwig_reps:
expand(OPJ(mapping_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}", "{{lib}}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), rep=REPS), expand(OPJ(mapping_dir, aligner, "mapped_C_elegans", "{{lib}}_{rep}", "{{lib}}_{rep}_on_C_elegans_by_{{norm_type}}_{{orientation}}.bw"), rep=REPS),
output: output:
bw = OPJ(mapping_dir, aligner, "mapped_C_elegans", "{lib}_mean", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"), bw = OPJ(mapping_dir, aligner, "mapped_C_elegans", "{lib}_mean", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.bw"),
log:
warnings = OPJ(log_dir, "merge_bigwig_reps", "{lib}_mean_on_C_elegans_by_{norm_type}_{orientation}.warnings"),
threads: 12 # to limit memory usage, actually threads: 12 # to limit memory usage, actually
run: run:
bws = [pyBigWig.open(bw_filename) for bw_filename in input] with warn_context(log.warnings) as warn:
bw_out = pyBigWig.open(output.bw, "w") try:
bw_out.addHeader(list(bws[0].chroms().items())) bws = [pyBigWig.open(bw_filename) for bw_filename in input]
for (chrom, chrom_len) in bw_out.chroms().items(): #for bw_filename in input:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws]) # bws.append(pyBigWig.open(bw_filename))
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in bws]), axis=0) except RuntimeError as e:
# bin size is 10 warn(str(e))
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10) warn("Generating empty file.\n")
bw_out.close() # Make the file empty
for bw in bws: open(output.bw, "w").close()
bw.close() else:
bw_out = pyBigWig.open(output.bw, "w")
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
try:
assert all([bw.chroms()[chrom] == chrom_len for bw in bws])
except KeyError as e:
warn(str(e))
warn(f"Chromosome {chrom} might be missing from one of the input files.\n")
for filename, bw in zip(input, bws):
msg = " ".join([f"{filename}:", *list(bw.chroms().keys())])
warn(f"{msg}:\n")
#raise
warn(f"The bigwig files without {chrom} will be skipped.\n")
to_use = [bw for bw in bws if chrom in bw.chroms()]
if to_use:
means = np.nanmean(np.vstack([bw.values(chrom, 0, chrom_len, numpy=True) for bw in to_use]), axis=0)
else:
means = np.zeros(chrom_len)
# bin size is 10
bw_out.addEntries(chrom, 0, values=np.nan_to_num(means[0::10]), span=10, step=10)
bw_out.close()
for bw in bws:
bw.close()
rule join_all_counts: rule join_all_counts:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment