diff --git a/bam25prime/Python/bam25prime/__init__.py b/bam25prime/Python/bam25prime/__init__.py index f3ac9f63620cc5930d7bece50349a7b966975639..9c979ee4d31bca39c8c2c27598fb5f7c94e39ddf 100644 --- a/bam25prime/Python/bam25prime/__init__.py +++ b/bam25prime/Python/bam25prime/__init__.py @@ -1,3 +1,6 @@ from .bam25prime import ( collapse_ali, - collapse_and_sort) + collapse_bed, + collapse_and_sort, + collapse_and_sort_bedtool, + ) diff --git a/bam25prime/Python/bam25prime/bam25prime.py b/bam25prime/Python/bam25prime/bam25prime.py index 60017112f21a3be36f241207f425a80e842fa2eb..0c9e84879ace2da5a5db8509ea39bc3b278c9c97 100755 --- a/bam25prime/Python/bam25prime/bam25prime.py +++ b/bam25prime/Python/bam25prime/bam25prime.py @@ -8,7 +8,8 @@ When executed, this module collapses reads in a bam file to their 5-prime end import argparse from pybedtools import BedTool, Interval from pysam import AlignmentFile -from .libbam25prime import collapse_ali +from .libcollapsesam import collapse_ali +from .libcollapsebed import collapse_bed # pybedtools.Interval | pysam.AlignedSegment # chrom | reference_name @@ -114,7 +115,7 @@ from .libbam25prime import collapse_ali def collapse_and_sort(alis): """ - Collapse elements from *ali* to their 5 prime end. + Collapse elements from *alis* to their 5 prime end. The resulting :class:`pybedtool.BedTool` is sorted. @@ -131,6 +132,23 @@ def collapse_and_sort(alis): from_string=True).sort(stream=True) +def collapse_and_sort_bedtool(bedtool): + """ + Collapse elements from *bedtool* to their 5 prime end. + + The resulting :class:`pybedtool.BedTool` is sorted. + + :param bedtool: A :class:`pybedtool.BedTool` + :rtype: :class:`pybedtool.BedTool` + + :class:`pybedtool.BedTool` is an iterable: + It can be iterated over in a for loop, or produce an iterator + using :func:`iter` on it + (see https://stackoverflow.com/a/28353158/1878788). + """ + return bedtool.each(collapse_bed).sort(stream=True) + + def main(): """Main function of the program.""" parser = argparse.ArgumentParser( diff --git a/bam25prime/Python/bam25prime/libbam25prime.pyx b/bam25prime/Python/bam25prime/libbam25prime.pyx deleted file mode 100755 index d91e4435564485b84c4d9aa585fb997c858bdef3..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bam25prime/libbam25prime.pyx +++ /dev/null @@ -1,149 +0,0 @@ -#!/usr/bin/env python3 -# vim: set fileencoding=<utf-8> : -# cython: language_level=3 -""" -This library contains a cythonized version of a function to collapse aligned -segments to their 5' end. -""" - -# http://stackoverflow.com/a/23575771/1878788 -# Also works by setting language_level as comment -# at the top of the file. -#from __future__ import print_function -#from pybedtools import Interval -from pysam.libchtslib cimport BAM_FREVERSE -from pysam.libcalignedsegment cimport AlignedSegment - -#cdef int BAM_FREVERSE = 16 - - -cdef object ccollapse_ali(AlignedSegment ali): - """ - Return the bed information corresponding to the 5' collapse - of AlignedSegment *ali*. - """ - if (ali.flag & BAM_FREVERSE) != 0: - return "\t".join([ - ali.reference_name, - # five prime position - str(ali.reference_end - 1), - str(ali.reference_end), - ".", - ".", - "-"]) - else: - return "\t".join([ - ali.reference_name, - # five prime position - str(ali.reference_start), - str(ali.reference_start + 1), - ".", - ".", - "+"]) - - -def collapse_ali(ali): - """ - Return the bed information corresponding to the 5' collapse - of AlignedSegment *ali*. - """ - return ccollapse_ali(ali) - - -## These should have problems mapping because the T-tail is not -## in the genome sequence -#cdef bint chas_csr1_endosi_signature(str seq, int read_len): -# # if read_len <= 22, t_tail will be the empty string -# cdef str t_tail -# t_tail = (read_len - SI_MIN) * "T" -# if t_tail: -# return (seq[0] == "G") and (seq[SI_MIN:] == t_tail) -# else: -# return False -# -# -#def has_csr1_endosi_signature(str seq, int read_len): -# return chas_csr1_endosi_signature(seq, read_len) -# -# -#cdef bint chas_allsi_signature(str seq): -# cdef unsigned int read_len = len(seq) -# return (seq[0] == "G" and (SI_MIN <= read_len <= SI_MAX)) or chas_csr1_endosi_signature(seq, read_len) -# -# -#cdef class Composition: -# cdef readonly int a, c, g, t -# def __init__(self, int a=0, int c=0, int g=0, int t=0): -# self.a = a -# self.c = c -# self.g = g -# self.t = t -# def __add__(self, other): -# return Composition( -# self.a + other.a, -# self.c + other.c, -# self.g + other.g, -# self.t + other.t) -# def __reduce__(self): -# d = {} -# d["a"] = self.a -# d["c"] = self.c -# d["g"] = self.g -# d["t"] = self.t -# return (Composition, (), d) -# def __setstate__(self, d): -# self.a = d["a"] -# self.c = d["c"] -# self.g = d["g"] -# self.t = d["t"] -# -# -#def count_nucl(int read_len, str nucl): -# if nucl == "A": -# return {read_len: Composition(a=1)} -# elif nucl == "C": -# return {read_len: Composition(c=1)} -# elif nucl == "G": -# return {read_len: Composition(g=1)} -# elif nucl == "T": -# return {read_len: Composition(t=1)} -# else: -# return {read_len: Composition()} -# -# -#def collapse_ali(ali): -# """ -# Collapse a :class:`pysam.AlignedSegment` to its 5' position. -# -# The returned object is a :class:`pybedtools.Interval` of length 1, -# with minimal location and orientation information. -# -# :param ali: The aligned segment to be collapsed. -# :type ali: :class:`pysam.AlignedSegment` -# :rtype: :class:`pybedtools.Interval` -# """ -# if ali.is_reverse: -# return Interval( -# ali.reference_name, -# # five prime position -# ali.reference_end - 1, -# ali.reference_end, -# strand="-") -# else: -# return Interval( -# ali.reference_name, -# # five prime position -# ali.reference_start, -# ali.reference_start + 1, -# strand="+") - - -def main(): - """Example use case.""" - print("No examples to show here.") - return 0 - - -if __name__ == "__main__": - import sys - sys.exit(main()) diff --git a/bam25prime/Python/bam25prime/libcollapsebed.pyx b/bam25prime/Python/bam25prime/libcollapsebed.pyx new file mode 100644 index 0000000000000000000000000000000000000000..ba2aa8d42db4f6880ffa2c03a020961d90790fcf --- /dev/null +++ b/bam25prime/Python/bam25prime/libcollapsebed.pyx @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# vim: set fileencoding=<utf-8> : +# cython: language_level=3 +""" +This library contains a cythonized version of a function to collapse +:class:`pybedtools.Interval`s to their 5' end. +""" + +# http://stackoverflow.com/a/23575771/1878788 +# Also works by setting language_level as comment +# at the top of the file. +#from __future__ import print_function +from pybedtools.cbedtools cimport Interval + + +cdef ccollapse_bed(Interval bed): + """ + Return the Interval corresponding to the 5' collapse of Interval *bed*. + """ + if bed.strand == "-": + bed.start = bed.stop - 1 + else: + bed.stop = bed.start + 1 + return bed + + +def collapse_bed(bed): + """ + Return the Interval corresponding to the 5' collapse of Interval *bed*. + """ + return ccollapse_bed(bed) + + +def main(): + """Example use case.""" + print("No examples to show here.") + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(main()) diff --git a/bam25prime/Python/bam25prime/libcollapsesam.pyx b/bam25prime/Python/bam25prime/libcollapsesam.pyx new file mode 100755 index 0000000000000000000000000000000000000000..4d7879a480f5875860cf3bb0e6ebe916c0ce24cc --- /dev/null +++ b/bam25prime/Python/bam25prime/libcollapsesam.pyx @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# vim: set fileencoding=<utf-8> : +# cython: language_level=3 +""" +This library contains a cythonized version of a function to collapse aligned +segments to their 5' end. +""" + +# http://stackoverflow.com/a/23575771/1878788 +# Also works by setting language_level as comment +# at the top of the file. +#from __future__ import print_function +#from pybedtools import Interval +from pysam.libchtslib cimport BAM_FREVERSE +#cdef int BAM_FREVERSE = 16 +from pysam.libcalignedsegment cimport AlignedSegment + + +cdef object ccollapse_ali(AlignedSegment ali): + """ + Return the bed information corresponding to the 5' collapse + of AlignedSegment *ali*. + """ + if (ali.flag & BAM_FREVERSE) != 0: + return "\t".join([ + ali.reference_name, + # five prime position + str(ali.reference_end - 1), + str(ali.reference_end), + ".", + ".", + "-"]) + else: + return "\t".join([ + ali.reference_name, + # five prime position + str(ali.reference_start), + str(ali.reference_start + 1), + ".", + ".", + "+"]) + + +def collapse_ali(ali): + """ + Return the bed information corresponding to the 5' collapse + of AlignedSegment *ali*. + """ + return ccollapse_ali(ali) + + +def main(): + """Example use case.""" + print("No examples to show here.") + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(main()) diff --git a/bam25prime/Python/setup.py b/bam25prime/Python/setup.py index e89db1d7cdc2bd46b17fdca58cfaecb7f215de6a..0f716255bffc30a79efd45beaeba2162d61bf9d4 100644 --- a/bam25prime/Python/setup.py +++ b/bam25prime/Python/setup.py @@ -1,4 +1,5 @@ from setuptools import setup, find_packages +from glob import glob # If you have .pyx things to cythonize from distutils.extension import Extension from Cython.Build import cythonize @@ -10,8 +11,12 @@ __version__ = "0.1" # https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build extensions = [ Extension( - "bam25prime.libbam25prime", ["bam25prime/libbam25prime.pyx"], + "bam25prime.libcollapsesam", ["bam25prime/libcollapsesam.pyx"], include_dirs=pysam_get_include()), + Extension( + "bam25prime.libcollapsebed", ["bam25prime/libcollapsebed.pyx"], + include_dirs=glob("/home/bli/src/bedtools2-2.27.1/src/utils/*"), + language="c++"), ] setup(