Commit a927a057 authored by Blaise Li's avatar Blaise Li
Browse files

Prepare for testing different alignment settings.

Added function to compute seed disposition from bowtie2 settings.
parent 00f3e419
from .libhts import (
do_deseq2, gtf_2_genes_exon_lengths,
make_empty_bigwig,
make_seeding_function,
median_ratio_to_pseudo_ref_size_factors,
plot_boxplots, plot_counts_distribution, plot_histo,
plot_lfc_distribution, plot_MA,
......
from math import floor, ceil
from math import floor, ceil, sqrt, log
from functools import reduce
from re import sub
import warnings
......@@ -177,13 +177,67 @@ def spikein_gtf_2_lengths(spikein_gtf):
def make_empty_bigwig(filename, chrom_sizes):
"""Writes *filename* so that it is an empty bigwig file.
*chrom_sizes is a dictionary giving chromosome sizes* given
chomosome names.
"""
bw_out = pyBigWig.open(filename, "w")
bw_out.addHeader(list(chrom_sizes.items()))
for (chrom, chrom_len) in bw_out.chroms().items():
bw_out.addEntries(chrom, 0, values=np.nan_to_num(np.zeros(chrom_len)[0::10]), span=10, step=10)
bw_out.addEntries(
chrom, 0,
values=np.nan_to_num(np.zeros(chrom_len)[0::10]),
span=10, step=10)
bw_out.close()
def zero(value):
return 0
def identity(value):
return value
bowtie2_function_selector = {
"C": zero,
"L": identity,
"S": sqrt,
"G": log}
def make_seeding_function(seeding_string):
"""Generates a function that computes the seeding pattern given a
string representing bowtie2 seeding settings (-L and -i options).
>>> make_seeding_function("-L 6 -i S,1,0.8")(18)
[[0, 6], [4, 10], [8, 14], [12, 18]]
"""
[opt1, val1, opt2, val2] = seeding_string.split()
if opt1 == "-L":
assert opt2 == "-i"
seed_len = int(val1)
interval_string = val2
else:
assert opt2 == "-L"
seed_len = int(val2)
interval_string = val1
[func_type, constant, coeff] = interval_string.split(",")
constant = float(constant)
coeff = float(coeff)
func_type = bowtie2_function_selector[func_type]
def seeding_function(read_len):
interval = floor(constant + (coeff * func_type(read_len)))
seeds = []
seed = [0, seed_len]
while seed[1] <= read_len:
seeds.append(seed)
next_seed_start = seed[0] + interval
seed = [next_seed_start, next_seed_start + seed_len]
return seeds
return seeding_function
def do_deseq2(cond_names, conditions, counts_data,
formula=None, contrast=None, deseq2_args=None):
"""Runs a DESeq2 differential expression analysis."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment