Skip to content
Snippets Groups Projects
Commit d0eee539 authored by Blaise Li's avatar Blaise Li
Browse files

Can also collapse pybedtools Interval and BedTool.

parent edab310e
No related branches found
No related tags found
No related merge requests found
from .bam25prime import (
collapse_ali,
collapse_and_sort)
collapse_bed,
collapse_and_sort,
collapse_and_sort_bedtool,
)
......@@ -8,7 +8,8 @@ When executed, this module collapses reads in a bam file to their 5-prime end
import argparse
from pybedtools import BedTool, Interval
from pysam import AlignmentFile
from .libbam25prime import collapse_ali
from .libcollapsesam import collapse_ali
from .libcollapsebed import collapse_bed
# pybedtools.Interval | pysam.AlignedSegment
# chrom | reference_name
......@@ -114,7 +115,7 @@ from .libbam25prime import collapse_ali
def collapse_and_sort(alis):
"""
Collapse elements from *ali* to their 5 prime end.
Collapse elements from *alis* to their 5 prime end.
The resulting :class:`pybedtool.BedTool` is sorted.
......@@ -131,6 +132,23 @@ def collapse_and_sort(alis):
from_string=True).sort(stream=True)
def collapse_and_sort_bedtool(bedtool):
"""
Collapse elements from *bedtool* to their 5 prime end.
The resulting :class:`pybedtool.BedTool` is sorted.
:param bedtool: A :class:`pybedtool.BedTool`
:rtype: :class:`pybedtool.BedTool`
:class:`pybedtool.BedTool` is an iterable:
It can be iterated over in a for loop, or produce an iterator
using :func:`iter` on it
(see https://stackoverflow.com/a/28353158/1878788).
"""
return bedtool.each(collapse_bed).sort(stream=True)
def main():
"""Main function of the program."""
parser = argparse.ArgumentParser(
......
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
# cython: language_level=3
"""
This library contains a cythonized version of a function to collapse
:class:`pybedtools.Interval`s to their 5' end.
"""
# http://stackoverflow.com/a/23575771/1878788
# Also works by setting language_level as comment
# at the top of the file.
#from __future__ import print_function
from pybedtools.cbedtools cimport Interval
cdef ccollapse_bed(Interval bed):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
if bed.strand == "-":
bed.start = bed.stop - 1
else:
bed.stop = bed.start + 1
return bed
def collapse_bed(bed):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
return ccollapse_bed(bed)
def main():
"""Example use case."""
print("No examples to show here.")
return 0
if __name__ == "__main__":
import sys
sys.exit(main())
......@@ -12,9 +12,8 @@ segments to their 5' end.
#from __future__ import print_function
#from pybedtools import Interval
from pysam.libchtslib cimport BAM_FREVERSE
from pysam.libcalignedsegment cimport AlignedSegment
#cdef int BAM_FREVERSE = 16
from pysam.libcalignedsegment cimport AlignedSegment
cdef object ccollapse_ali(AlignedSegment ali):
......@@ -50,94 +49,6 @@ def collapse_ali(ali):
return ccollapse_ali(ali)
## These should have problems mapping because the T-tail is not
## in the genome sequence
#cdef bint chas_csr1_endosi_signature(str seq, int read_len):
# # if read_len <= 22, t_tail will be the empty string
# cdef str t_tail
# t_tail = (read_len - SI_MIN) * "T"
# if t_tail:
# return (seq[0] == "G") and (seq[SI_MIN:] == t_tail)
# else:
# return False
#
#
#def has_csr1_endosi_signature(str seq, int read_len):
# return chas_csr1_endosi_signature(seq, read_len)
#
#
#cdef bint chas_allsi_signature(str seq):
# cdef unsigned int read_len = len(seq)
# return (seq[0] == "G" and (SI_MIN <= read_len <= SI_MAX)) or chas_csr1_endosi_signature(seq, read_len)
#
#
#cdef class Composition:
# cdef readonly int a, c, g, t
# def __init__(self, int a=0, int c=0, int g=0, int t=0):
# self.a = a
# self.c = c
# self.g = g
# self.t = t
# def __add__(self, other):
# return Composition(
# self.a + other.a,
# self.c + other.c,
# self.g + other.g,
# self.t + other.t)
# def __reduce__(self):
# d = {}
# d["a"] = self.a
# d["c"] = self.c
# d["g"] = self.g
# d["t"] = self.t
# return (Composition, (), d)
# def __setstate__(self, d):
# self.a = d["a"]
# self.c = d["c"]
# self.g = d["g"]
# self.t = d["t"]
#
#
#def count_nucl(int read_len, str nucl):
# if nucl == "A":
# return {read_len: Composition(a=1)}
# elif nucl == "C":
# return {read_len: Composition(c=1)}
# elif nucl == "G":
# return {read_len: Composition(g=1)}
# elif nucl == "T":
# return {read_len: Composition(t=1)}
# else:
# return {read_len: Composition()}
#
#
#def collapse_ali(ali):
# """
# Collapse a :class:`pysam.AlignedSegment` to its 5' position.
#
# The returned object is a :class:`pybedtools.Interval` of length 1,
# with minimal location and orientation information.
#
# :param ali: The aligned segment to be collapsed.
# :type ali: :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtools.Interval`
# """
# if ali.is_reverse:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_end - 1,
# ali.reference_end,
# strand="-")
# else:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_start,
# ali.reference_start + 1,
# strand="+")
def main():
"""Example use case."""
print("No examples to show here.")
......
from setuptools import setup, find_packages
from glob import glob
# If you have .pyx things to cythonize
from distutils.extension import Extension
from Cython.Build import cythonize
......@@ -10,8 +11,12 @@ __version__ = "0.1"
# https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build
extensions = [
Extension(
"bam25prime.libbam25prime", ["bam25prime/libbam25prime.pyx"],
"bam25prime.libcollapsesam", ["bam25prime/libcollapsesam.pyx"],
include_dirs=pysam_get_include()),
Extension(
"bam25prime.libcollapsebed", ["bam25prime/libcollapsebed.pyx"],
include_dirs=glob("/home/bli/src/bedtools2-2.27.1/src/utils/*"),
language="c++"),
]
setup(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment