diff --git a/bam25prime/Python/bam25prime/__init__.py b/bam25prime/Python/bam25prime/__init__.py deleted file mode 100644 index 9c979ee4d31bca39c8c2c27598fb5f7c94e39ddf..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bam25prime/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .bam25prime import ( - collapse_ali, - collapse_bed, - collapse_and_sort, - collapse_and_sort_bedtool, - ) diff --git a/bam25prime/Python/bam25prime/bam25prime.py b/bam25prime/Python/bam25prime/bam25prime.py deleted file mode 100755 index 3ff2d6ee0edf6336864fd251919a568ad0b0eabe..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bam25prime/bam25prime.py +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env python3 -# vim: set fileencoding=<utf-8> : -""" -When executed, this module collapses reads in a bam file to their 5-prime end -(reducing them to length 1) and writes the result in bed format. -""" - -import argparse -from pybedtools import BedTool, Interval -from pysam import AlignmentFile -from .libcollapsesam import collapse_ali -from .libcollapsebed import collapse_bed - -# pybedtools.Interval | pysam.AlignedSegment -# chrom | reference_name -# start | reference_start -# end | reference_end -# bool(int(strand)) | is_reverse -# -# Interval(reference_name, reference_start, reference_end, strand=strand) - -# filename = "../RNA_Seq_analyses/results_44hph_vs_38hph/hisat2/mapped_C_elegans/WT_44hph_4_polyU_on_C_elegans_sorted.bam" -# bamfile = pysam.AlignmentFile(filename, "rb") -# beds_in = pybedtools.BedTool(filename) -# -# In [99]: %timeit [int(bed.strand) for bed in beds_in] -# 2.03 s ± 25.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) -# -# In [100]: %timeit [ali.is_reverse for ali in bamfile.fetch()] -# 67 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) -# -# In [101]: beds_in = pybedtools.BedTool(filename) -# -# In [102]: %timeit [bed.strand for bed in beds_in] -# 2.05 s ± 5.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) -# -# In [103]: beds_in = pybedtools.BedTool(filename) -# -# In [104]: %timeit [bed.strand == "16" for bed in beds_in] -# 2 s ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) - - -# TODO: Test if replacing by cythonized version is more efficient -# def collapse_ali(ali): -# """ -# Collapse a :class:`pysam.AlignedSegment` to its 5' position. -# -# The returned object is a :class:`pybedtools.Interval` of length 1, -# with minimal location and orientation information. -# -# :param ali: The aligned segment to be collapsed. -# :type ali: :class:`pysam.AlignedSegment` -# :rtype: :class:`pybedtools.Interval` -# """ -# if ali.is_reverse: -# return Interval( -# ali.reference_name, -# # five prime position -# ali.reference_end - 1, -# ali.reference_end, -# strand="-") -# else: -# return Interval( -# ali.reference_name, -# # five prime position -# ali.reference_start, -# ali.reference_start + 1, -# strand="+") -# -# -# def collapse_and_sort(alis): -# """ -# Collapse elements from *ali* to their 5 prime end. -# -# The resulting :class:`pybedtool.BedTool` is sorted. -# -# :param alis: An iterable of :class:`pysam.AlignedSegment` -# :rtype: :class:`pybedtool.BedTool` -# -# :class:`pybedtool.BedTool` is an iterable: -# It can be iterated over in a for loop, or produce an iterator -# using :func:`iter` on it. -# """ -# return BedTool(map(collapse_ali, alis)).sort() -#def collapse_ali(ali): -# """ -# Collapse a :class:`pysam.AlignedSegment` to its 5' position. -# -# The returned object is a :class:`pybedtools.Interval` of length 1, -# with minimal location and orientation information. -# -# :param ali: The aligned segment to be collapsed. -# :type ali: :class:`pysam.AlignedSegment` -# :rtype: :class:`pybedtools.Interval` -# """ -# if ali.is_reverse: -# return "\t".join([ -# ali.reference_name, -# # five prime position -# str(ali.reference_end - 1), -# str(ali.reference_end), -# ".", -# ".", -# "-"]) -# else: -# return "\t".join([ -# ali.reference_name, -# # five prime position -# str(ali.reference_start), -# str(ali.reference_start + 1), -# ".", -# ".", -# "+"]) - - -def collapse_and_sort(alis): - """ - Collapse elements from *alis* to their 5 prime end. - - The resulting :class:`pybedtools.BedTool` is sorted. - - :param alis: An iterable of :class:`pysam.AlignedSegment` - :rtype: :class:`pybedtools.BedTool` - - :class:`pybedtools.BedTool` is an iterable: - It can be iterated over in a for loop, or produce an iterator - using :func:`iter` on it - (see https://stackoverflow.com/a/28353158/1878788). - """ - return BedTool( - "\n".join(map(collapse_ali, alis)), - from_string=True).sort(stream=True) - - -def collapse_and_sort_bedtool(bedtool): - """ - Collapse elements from *bedtool* to their 5 prime end. - - The resulting :class:`pybedtool.BedTool` is sorted. - - :param bedtool: A :class:`pybedtool.BedTool` - :rtype: :class:`pybedtool.BedTool` - - :class:`pybedtool.BedTool` is an iterable: - It can be iterated over in a for loop, or produce an iterator - using :func:`iter` on it - (see https://stackoverflow.com/a/28353158/1878788). - """ - return bedtool.each(collapse_bed).sort(stream=True) - - -def main(): - """Main function of the program.""" - parser = argparse.ArgumentParser( - description=__doc__, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument( - "-b", "--bamfile", - required=True, - help="Sorted and indexed bam file containing the aligned reads.") - parser.add_argument( - "-o", "--out_bed", - required=True, - help="Bed file in which the results should be written.") - args = parser.parse_args() - with AlignmentFile(args.bamfile, "rb") as bamfile: - collapse_and_sort(bamfile.fetch()).saveas(args.out_bed) - return 0 - - -if __name__ == "__main__": - exit(main()) diff --git a/bam25prime/Python/bam25prime/libcollapsebed.pyx b/bam25prime/Python/bam25prime/libcollapsebed.pyx deleted file mode 100644 index ba2aa8d42db4f6880ffa2c03a020961d90790fcf..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bam25prime/libcollapsebed.pyx +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 -# vim: set fileencoding=<utf-8> : -# cython: language_level=3 -""" -This library contains a cythonized version of a function to collapse -:class:`pybedtools.Interval`s to their 5' end. -""" - -# http://stackoverflow.com/a/23575771/1878788 -# Also works by setting language_level as comment -# at the top of the file. -#from __future__ import print_function -from pybedtools.cbedtools cimport Interval - - -cdef ccollapse_bed(Interval bed): - """ - Return the Interval corresponding to the 5' collapse of Interval *bed*. - """ - if bed.strand == "-": - bed.start = bed.stop - 1 - else: - bed.stop = bed.start + 1 - return bed - - -def collapse_bed(bed): - """ - Return the Interval corresponding to the 5' collapse of Interval *bed*. - """ - return ccollapse_bed(bed) - - -def main(): - """Example use case.""" - print("No examples to show here.") - return 0 - - -if __name__ == "__main__": - import sys - sys.exit(main()) diff --git a/bam25prime/Python/bam25prime/libcollapsesam.pyx b/bam25prime/Python/bam25prime/libcollapsesam.pyx deleted file mode 100755 index 4d7879a480f5875860cf3bb0e6ebe916c0ce24cc..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bam25prime/libcollapsesam.pyx +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/env python3 -# vim: set fileencoding=<utf-8> : -# cython: language_level=3 -""" -This library contains a cythonized version of a function to collapse aligned -segments to their 5' end. -""" - -# http://stackoverflow.com/a/23575771/1878788 -# Also works by setting language_level as comment -# at the top of the file. -#from __future__ import print_function -#from pybedtools import Interval -from pysam.libchtslib cimport BAM_FREVERSE -#cdef int BAM_FREVERSE = 16 -from pysam.libcalignedsegment cimport AlignedSegment - - -cdef object ccollapse_ali(AlignedSegment ali): - """ - Return the bed information corresponding to the 5' collapse - of AlignedSegment *ali*. - """ - if (ali.flag & BAM_FREVERSE) != 0: - return "\t".join([ - ali.reference_name, - # five prime position - str(ali.reference_end - 1), - str(ali.reference_end), - ".", - ".", - "-"]) - else: - return "\t".join([ - ali.reference_name, - # five prime position - str(ali.reference_start), - str(ali.reference_start + 1), - ".", - ".", - "+"]) - - -def collapse_ali(ali): - """ - Return the bed information corresponding to the 5' collapse - of AlignedSegment *ali*. - """ - return ccollapse_ali(ali) - - -def main(): - """Example use case.""" - print("No examples to show here.") - return 0 - - -if __name__ == "__main__": - import sys - sys.exit(main()) diff --git a/bam25prime/Python/bin/bam25prime b/bam25prime/Python/bin/bam25prime deleted file mode 120000 index 2929fb4ac91a82dd63ebbb0b9474ed1b10bc11f0..0000000000000000000000000000000000000000 --- a/bam25prime/Python/bin/bam25prime +++ /dev/null @@ -1 +0,0 @@ -../bam25prime/bam25prime.py \ No newline at end of file diff --git a/bam25prime/Python/install.sh b/bam25prime/Python/install.sh deleted file mode 100755 index 5ddc41a80e7fe6839786c8be3efa902015bc1b2a..0000000000000000000000000000000000000000 --- a/bam25prime/Python/install.sh +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -python3.6 setup.py build_ext -# .egg-link does not work with PYTHONPATH ? -python3.6 -m pip install -e . -python3.6 -m pip install --no-deps --ignore-installed . diff --git a/bam25prime/Python/setup.py b/bam25prime/Python/setup.py deleted file mode 100644 index fb09e7f1c1227cf1cbb2b6e9a4cb5f7b33cdc170..0000000000000000000000000000000000000000 --- a/bam25prime/Python/setup.py +++ /dev/null @@ -1,40 +0,0 @@ -from setuptools import setup, find_packages -from glob import glob -# If you have .pyx things to cythonize -from distutils.extension import Extension -from Cython.Build import cythonize -from pybedtools.helpers import get_includes as pybedtools_get_includes -from pysam import get_include as pysam_get_include - -name = "bam25prime" -__version__ = "0.1" - -# https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build -extensions = [ - Extension( - "bam25prime.libcollapsesam", ["bam25prime/libcollapsesam.pyx"], - include_dirs=pysam_get_include()), - Extension( - "bam25prime.libcollapsebed", ["bam25prime/libcollapsebed.pyx"], - include_dirs=glob("/home/bli/src/bedtools2-2.27.1/src/utils/*"), - #include_dirs=pybedtools_get_includes(), - language="c++"), - ] - -setup( - name=name, - version=__version__, - description="Library providing utilities to collapse aligned segments" - "in a bam file to their 5-prime end.", - author="Blaise Li", - author_email="blaise.li@normalesup.org", - license="MIT", - packages=find_packages(), - scripts=["bin/%s" % name], - ext_modules = cythonize(extensions), - #ext_modules = cythonize("bam25prime/libbam25prime.pyx", include_path=pysam_get_include()), - install_requires=["pysam", "pybedtools"]) -# If you have .pyx things to cythonize - #ext_modules = cythonize("libsmallrna/libsmallrna.pyx"), - #install_requires=["cytoolz"], - #zip_safe=False)