Commit 973a02ec authored by Blaise Li's avatar Blaise Li
Browse files

Add spike-ins in "exon" length computation.

Also noticed a problem involving MultiIndex
(https://stackoverflow.com/q/48345640/1878788).
parent 785b0089
from .libhts import do_deseq2, gtf_2_genes_exon_lengths, median_ratio_to_pseudo_ref_size_factors, plot_boxplots, plot_counts_distribution, plot_lfc_distribution, plot_MA, plot_norm_correlations, plot_scatter, repeat_bed_2_lengths, size_factor_correlations, status_setter
from .libhts import do_deseq2, gtf_2_genes_exon_lengths, median_ratio_to_pseudo_ref_size_factors, plot_boxplots, plot_counts_distribution, plot_lfc_distribution, plot_MA, plot_norm_correlations, plot_scatter, repeat_bed_2_lengths, size_factor_correlations, spikein_gtf_2_lengths, status_setter
......@@ -117,7 +117,7 @@ def gtf_2_genes_exon_lengths(gtf_filename):
gene.set_union_exon_length()
return pd.DataFrame(pd.Series(
{gene.gene_id : gene.union_exon_length for gene in genes.values()},
name=("union_exon_len",)).rename_axis("gene"))
name=("union_exon_len")).rename_axis("gene"))
def repeat_bed_2_lengths(repeat_bed):
......@@ -144,6 +144,20 @@ def repeat_bed_2_lengths(repeat_bed):
return pd.DataFrame(lens).assign(gene=repeat_families).groupby("gene").sum()
def spikein_gtf_2_lengths(spikein_gtf):
"""Computes the lengths of spike-ins, grouped by families.
Returns a DataFrame associating the summed lengths to the spike-in namse.
"""
spikein_ends = {}
with open(spikein_gtf) as gtf_file:
for line in gtf_file:
fields = line.strip().split("\t")
spikein_ends[fields[0]] = int(fields[4])
return pd.DataFrame(pd.Series(
spikein_ends,
name=("union_exon_len")).rename_axis("gene"))
def do_deseq2(cond_names, conditions, counts_data,
formula=None, contrast=None, deseq2_args=None):
"""Runs a DESeq2 differential expression analysis."""
......@@ -151,6 +165,7 @@ def do_deseq2(cond_names, conditions, counts_data,
formula = Formula("~ lib")
if contrast is None:
# FIXME: MUT and REF are not defined
# Maybe just make (formula and) contrast mandatory
contrast = StrVector(["lib", MUT, REF])
if deseq2_args is None:
deseq2_args = {"betaPrior" : True, "addMLE" : True, "independentFiltering" : True}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment