diff --git a/libhts/__init__.py b/libhts/__init__.py index 5a45ee3d20d74e0d790407fe56946f55080bea73..b1aab224f06b5b3d591665a04587c727ed0e5ca8 100644 --- a/libhts/__init__.py +++ b/libhts/__init__.py @@ -1,6 +1,7 @@ from .libhts import ( do_deseq2, gtf_2_genes_exon_lengths, make_empty_bigwig, + make_seeding_function, median_ratio_to_pseudo_ref_size_factors, plot_boxplots, plot_counts_distribution, plot_histo, plot_lfc_distribution, plot_MA, diff --git a/libhts/libhts.py b/libhts/libhts.py index b3775766fea5c565be3b82cc8a08cbb27e24f24f..b087970d62aad6d064fda60139f873864dc2ff51 100644 --- a/libhts/libhts.py +++ b/libhts/libhts.py @@ -1,4 +1,4 @@ -from math import floor, ceil +from math import floor, ceil, sqrt, log from functools import reduce from re import sub import warnings @@ -177,13 +177,67 @@ def spikein_gtf_2_lengths(spikein_gtf): def make_empty_bigwig(filename, chrom_sizes): + """Writes *filename* so that it is an empty bigwig file. + *chrom_sizes is a dictionary giving chromosome sizes* given + chomosome names. + """ bw_out = pyBigWig.open(filename, "w") bw_out.addHeader(list(chrom_sizes.items())) for (chrom, chrom_len) in bw_out.chroms().items(): - bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10) + bw_out.addEntries( + chrom, 0, + values=np.nan_to_num(np.zeros(chrom_len)[0::10]), + span=10, step=10) bw_out.close() +def zero(value): + return 0 + + +def identity(value): + return value + + +bowtie2_function_selector = { + "C": zero, + "L": identity, + "S": sqrt, + "G": log} + + +def make_seeding_function(seeding_string): + """Generates a function that computes the seeding pattern given a + string representing bowtie2 seeding settings (-L and -i options). + + >>> make_seeding_function("-L 6 -i S,1,0.8")(18) + [[0, 6], [4, 10], [8, 14], [12, 18]] + """ + [opt1, val1, opt2, val2] = seeding_string.split() + if opt1 == "-L": + assert opt2 == "-i" + seed_len = int(val1) + interval_string = val2 + else: + assert opt2 == "-L" + seed_len = int(val2) + interval_string = val1 + [func_type, constant, coeff] = interval_string.split(",") + constant = float(constant) + coeff = float(coeff) + func_type = bowtie2_function_selector[func_type] + def seeding_function(read_len): + interval = floor(constant + (coeff * func_type(read_len))) + seeds = [] + seed = [0, seed_len] + while seed[1] <= read_len: + seeds.append(seed) + next_seed_start = seed[0] + interval + seed = [next_seed_start, next_seed_start + seed_len] + return seeds + return seeding_function + + def do_deseq2(cond_names, conditions, counts_data, formula=None, contrast=None, deseq2_args=None): """Runs a DESeq2 differential expression analysis."""