Commit 9126f7df authored by Blaise Li's avatar Blaise Li
Browse files

Removing bam25prime to make it a submodule.

parent 259967e5
from .bam25prime import (
collapse_ali,
collapse_bed,
collapse_and_sort,
collapse_and_sort_bedtool,
)
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
"""
When executed, this module collapses reads in a bam file to their 5-prime end
(reducing them to length 1) and writes the result in bed format.
"""
import argparse
from pybedtools import BedTool, Interval
from pysam import AlignmentFile
from .libcollapsesam import collapse_ali
from .libcollapsebed import collapse_bed
# pybedtools.Interval | pysam.AlignedSegment
# chrom | reference_name
# start | reference_start
# end | reference_end
# bool(int(strand)) | is_reverse
#
# Interval(reference_name, reference_start, reference_end, strand=strand)
# filename = "../RNA_Seq_analyses/results_44hph_vs_38hph/hisat2/mapped_C_elegans/WT_44hph_4_polyU_on_C_elegans_sorted.bam"
# bamfile = pysam.AlignmentFile(filename, "rb")
# beds_in = pybedtools.BedTool(filename)
#
# In [99]: %timeit [int(bed.strand) for bed in beds_in]
# 2.03 s ± 25.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#
# In [100]: %timeit [ali.is_reverse for ali in bamfile.fetch()]
# 67 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
#
# In [101]: beds_in = pybedtools.BedTool(filename)
#
# In [102]: %timeit [bed.strand for bed in beds_in]
# 2.05 s ± 5.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#
# In [103]: beds_in = pybedtools.BedTool(filename)
#
# In [104]: %timeit [bed.strand == "16" for bed in beds_in]
# 2 s ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# TODO: Test if replacing by cythonized version is more efficient
# def collapse_ali(ali):
# """
# Collapse a :class:`pysam.AlignedSegment` to its 5' position.
#
# The returned object is a :class:`pybedtools.Interval` of length 1,
# with minimal location and orientation information.
#
# :param ali: The aligned segment to be collapsed.
# :type ali: :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtools.Interval`
# """
# if ali.is_reverse:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_end - 1,
# ali.reference_end,
# strand="-")
# else:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_start,
# ali.reference_start + 1,
# strand="+")
#
#
# def collapse_and_sort(alis):
# """
# Collapse elements from *ali* to their 5 prime end.
#
# The resulting :class:`pybedtool.BedTool` is sorted.
#
# :param alis: An iterable of :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtool.BedTool`
#
# :class:`pybedtool.BedTool` is an iterable:
# It can be iterated over in a for loop, or produce an iterator
# using :func:`iter` on it.
# """
# return BedTool(map(collapse_ali, alis)).sort()
#def collapse_ali(ali):
# """
# Collapse a :class:`pysam.AlignedSegment` to its 5' position.
#
# The returned object is a :class:`pybedtools.Interval` of length 1,
# with minimal location and orientation information.
#
# :param ali: The aligned segment to be collapsed.
# :type ali: :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtools.Interval`
# """
# if ali.is_reverse:
# return "\t".join([
# ali.reference_name,
# # five prime position
# str(ali.reference_end - 1),
# str(ali.reference_end),
# ".",
# ".",
# "-"])
# else:
# return "\t".join([
# ali.reference_name,
# # five prime position
# str(ali.reference_start),
# str(ali.reference_start + 1),
# ".",
# ".",
# "+"])
def collapse_and_sort(alis):
"""
Collapse elements from *alis* to their 5 prime end.
The resulting :class:`pybedtools.BedTool` is sorted.
:param alis: An iterable of :class:`pysam.AlignedSegment`
:rtype: :class:`pybedtools.BedTool`
:class:`pybedtools.BedTool` is an iterable:
It can be iterated over in a for loop, or produce an iterator
using :func:`iter` on it
(see https://stackoverflow.com/a/28353158/1878788).
"""
return BedTool(
"\n".join(map(collapse_ali, alis)),
from_string=True).sort(stream=True)
def collapse_and_sort_bedtool(bedtool):
"""
Collapse elements from *bedtool* to their 5 prime end.
The resulting :class:`pybedtool.BedTool` is sorted.
:param bedtool: A :class:`pybedtool.BedTool`
:rtype: :class:`pybedtool.BedTool`
:class:`pybedtool.BedTool` is an iterable:
It can be iterated over in a for loop, or produce an iterator
using :func:`iter` on it
(see https://stackoverflow.com/a/28353158/1878788).
"""
return bedtool.each(collapse_bed).sort(stream=True)
def main():
"""Main function of the program."""
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(
"-b", "--bamfile",
required=True,
help="Sorted and indexed bam file containing the aligned reads.")
parser.add_argument(
"-o", "--out_bed",
required=True,
help="Bed file in which the results should be written.")
args = parser.parse_args()
with AlignmentFile(args.bamfile, "rb") as bamfile:
collapse_and_sort(bamfile.fetch()).saveas(args.out_bed)
return 0
if __name__ == "__main__":
exit(main())
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
# cython: language_level=3
"""
This library contains a cythonized version of a function to collapse
:class:`pybedtools.Interval`s to their 5' end.
"""
# http://stackoverflow.com/a/23575771/1878788
# Also works by setting language_level as comment
# at the top of the file.
#from __future__ import print_function
from pybedtools.cbedtools cimport Interval
cdef ccollapse_bed(Interval bed):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
if bed.strand == "-":
bed.start = bed.stop - 1
else:
bed.stop = bed.start + 1
return bed
def collapse_bed(bed):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
return ccollapse_bed(bed)
def main():
"""Example use case."""
print("No examples to show here.")
return 0
if __name__ == "__main__":
import sys
sys.exit(main())
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
# cython: language_level=3
"""
This library contains a cythonized version of a function to collapse aligned
segments to their 5' end.
"""
# http://stackoverflow.com/a/23575771/1878788
# Also works by setting language_level as comment
# at the top of the file.
#from __future__ import print_function
#from pybedtools import Interval
from pysam.libchtslib cimport BAM_FREVERSE
#cdef int BAM_FREVERSE = 16
from pysam.libcalignedsegment cimport AlignedSegment
cdef object ccollapse_ali(AlignedSegment ali):
"""
Return the bed information corresponding to the 5' collapse
of AlignedSegment *ali*.
"""
if (ali.flag & BAM_FREVERSE) != 0:
return "\t".join([
ali.reference_name,
# five prime position
str(ali.reference_end - 1),
str(ali.reference_end),
".",
".",
"-"])
else:
return "\t".join([
ali.reference_name,
# five prime position
str(ali.reference_start),
str(ali.reference_start + 1),
".",
".",
"+"])
def collapse_ali(ali):
"""
Return the bed information corresponding to the 5' collapse
of AlignedSegment *ali*.
"""
return ccollapse_ali(ali)
def main():
"""Example use case."""
print("No examples to show here.")
return 0
if __name__ == "__main__":
import sys
sys.exit(main())
../bam25prime/bam25prime.py
\ No newline at end of file
#!/bin/sh
python3.6 setup.py build_ext
# .egg-link does not work with PYTHONPATH ?
python3.6 -m pip install -e .
python3.6 -m pip install --no-deps --ignore-installed .
from setuptools import setup, find_packages
from glob import glob
# If you have .pyx things to cythonize
from distutils.extension import Extension
from Cython.Build import cythonize
from pybedtools.helpers import get_includes as pybedtools_get_includes
from pysam import get_include as pysam_get_include
name = "bam25prime"
__version__ = "0.1"
# https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build
extensions = [
Extension(
"bam25prime.libcollapsesam", ["bam25prime/libcollapsesam.pyx"],
include_dirs=pysam_get_include()),
Extension(
"bam25prime.libcollapsebed", ["bam25prime/libcollapsebed.pyx"],
include_dirs=glob("/home/bli/src/bedtools2-2.27.1/src/utils/*"),
#include_dirs=pybedtools_get_includes(),
language="c++"),
]
setup(
name=name,
version=__version__,
description="Library providing utilities to collapse aligned segments"
"in a bam file to their 5-prime end.",
author="Blaise Li",
author_email="blaise.li@normalesup.org",
license="MIT",
packages=find_packages(),
scripts=["bin/%s" % name],
ext_modules = cythonize(extensions),
#ext_modules = cythonize("bam25prime/libbam25prime.pyx", include_path=pysam_get_include()),
install_requires=["pysam", "pybedtools"])
# If you have .pyx things to cythonize
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#install_requires=["cytoolz"],
#zip_safe=False)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment