Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
bioinfo_utils
Commits
9126f7df
Commit
9126f7df
authored
Jul 17, 2019
by
Blaise Li
Browse files
Removing bam25prime to make it a submodule.
parent
259967e5
Changes
7
Hide whitespace changes
Inline
Side-by-side
bam25prime/Python/bam25prime/__init__.py
deleted
100644 → 0
View file @
259967e5
from
.bam25prime
import
(
collapse_ali
,
collapse_bed
,
collapse_and_sort
,
collapse_and_sort_bedtool
,
)
bam25prime/Python/bam25prime/bam25prime.py
deleted
100755 → 0
View file @
259967e5
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
"""
When executed, this module collapses reads in a bam file to their 5-prime end
(reducing them to length 1) and writes the result in bed format.
"""
import
argparse
from
pybedtools
import
BedTool
,
Interval
from
pysam
import
AlignmentFile
from
.libcollapsesam
import
collapse_ali
from
.libcollapsebed
import
collapse_bed
# pybedtools.Interval | pysam.AlignedSegment
# chrom | reference_name
# start | reference_start
# end | reference_end
# bool(int(strand)) | is_reverse
#
# Interval(reference_name, reference_start, reference_end, strand=strand)
# filename = "../RNA_Seq_analyses/results_44hph_vs_38hph/hisat2/mapped_C_elegans/WT_44hph_4_polyU_on_C_elegans_sorted.bam"
# bamfile = pysam.AlignmentFile(filename, "rb")
# beds_in = pybedtools.BedTool(filename)
#
# In [99]: %timeit [int(bed.strand) for bed in beds_in]
# 2.03 s ± 25.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#
# In [100]: %timeit [ali.is_reverse for ali in bamfile.fetch()]
# 67 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
#
# In [101]: beds_in = pybedtools.BedTool(filename)
#
# In [102]: %timeit [bed.strand for bed in beds_in]
# 2.05 s ± 5.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
#
# In [103]: beds_in = pybedtools.BedTool(filename)
#
# In [104]: %timeit [bed.strand == "16" for bed in beds_in]
# 2 s ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
# TODO: Test if replacing by cythonized version is more efficient
# def collapse_ali(ali):
# """
# Collapse a :class:`pysam.AlignedSegment` to its 5' position.
#
# The returned object is a :class:`pybedtools.Interval` of length 1,
# with minimal location and orientation information.
#
# :param ali: The aligned segment to be collapsed.
# :type ali: :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtools.Interval`
# """
# if ali.is_reverse:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_end - 1,
# ali.reference_end,
# strand="-")
# else:
# return Interval(
# ali.reference_name,
# # five prime position
# ali.reference_start,
# ali.reference_start + 1,
# strand="+")
#
#
# def collapse_and_sort(alis):
# """
# Collapse elements from *ali* to their 5 prime end.
#
# The resulting :class:`pybedtool.BedTool` is sorted.
#
# :param alis: An iterable of :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtool.BedTool`
#
# :class:`pybedtool.BedTool` is an iterable:
# It can be iterated over in a for loop, or produce an iterator
# using :func:`iter` on it.
# """
# return BedTool(map(collapse_ali, alis)).sort()
#def collapse_ali(ali):
# """
# Collapse a :class:`pysam.AlignedSegment` to its 5' position.
#
# The returned object is a :class:`pybedtools.Interval` of length 1,
# with minimal location and orientation information.
#
# :param ali: The aligned segment to be collapsed.
# :type ali: :class:`pysam.AlignedSegment`
# :rtype: :class:`pybedtools.Interval`
# """
# if ali.is_reverse:
# return "\t".join([
# ali.reference_name,
# # five prime position
# str(ali.reference_end - 1),
# str(ali.reference_end),
# ".",
# ".",
# "-"])
# else:
# return "\t".join([
# ali.reference_name,
# # five prime position
# str(ali.reference_start),
# str(ali.reference_start + 1),
# ".",
# ".",
# "+"])
def
collapse_and_sort
(
alis
):
"""
Collapse elements from *alis* to their 5 prime end.
The resulting :class:`pybedtools.BedTool` is sorted.
:param alis: An iterable of :class:`pysam.AlignedSegment`
:rtype: :class:`pybedtools.BedTool`
:class:`pybedtools.BedTool` is an iterable:
It can be iterated over in a for loop, or produce an iterator
using :func:`iter` on it
(see https://stackoverflow.com/a/28353158/1878788).
"""
return
BedTool
(
"
\n
"
.
join
(
map
(
collapse_ali
,
alis
)),
from_string
=
True
).
sort
(
stream
=
True
)
def
collapse_and_sort_bedtool
(
bedtool
):
"""
Collapse elements from *bedtool* to their 5 prime end.
The resulting :class:`pybedtool.BedTool` is sorted.
:param bedtool: A :class:`pybedtool.BedTool`
:rtype: :class:`pybedtool.BedTool`
:class:`pybedtool.BedTool` is an iterable:
It can be iterated over in a for loop, or produce an iterator
using :func:`iter` on it
(see https://stackoverflow.com/a/28353158/1878788).
"""
return
bedtool
.
each
(
collapse_bed
).
sort
(
stream
=
True
)
def
main
():
"""Main function of the program."""
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
"-b"
,
"--bamfile"
,
required
=
True
,
help
=
"Sorted and indexed bam file containing the aligned reads."
)
parser
.
add_argument
(
"-o"
,
"--out_bed"
,
required
=
True
,
help
=
"Bed file in which the results should be written."
)
args
=
parser
.
parse_args
()
with
AlignmentFile
(
args
.
bamfile
,
"rb"
)
as
bamfile
:
collapse_and_sort
(
bamfile
.
fetch
()).
saveas
(
args
.
out_bed
)
return
0
if
__name__
==
"__main__"
:
exit
(
main
())
bam25prime/Python/bam25prime/libcollapsebed.pyx
deleted
100644 → 0
View file @
259967e5
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
# cython: language_level=3
"""
This library contains a cythonized version of a function to collapse
:class:`pybedtools.Interval`s to their 5' end.
"""
# http://stackoverflow.com/a/23575771/1878788
# Also works by setting language_level as comment
# at the top of the file.
#from __future__ import print_function
from
pybedtools.cbedtools
cimport
Interval
cdef
ccollapse_bed
(
Interval
bed
):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
if
bed
.
strand
==
"-"
:
bed
.
start
=
bed
.
stop
-
1
else
:
bed
.
stop
=
bed
.
start
+
1
return
bed
def
collapse_bed
(
bed
):
"""
Return the Interval corresponding to the 5' collapse of Interval *bed*.
"""
return
ccollapse_bed
(
bed
)
def
main
():
"""Example use case."""
print
(
"No examples to show here."
)
return
0
if
__name__
==
"__main__"
:
import
sys
sys
.
exit
(
main
())
bam25prime/Python/bam25prime/libcollapsesam.pyx
deleted
100755 → 0
View file @
259967e5
#!/usr/bin/env python3
# vim: set fileencoding=<utf-8> :
# cython: language_level=3
"""
This library contains a cythonized version of a function to collapse aligned
segments to their 5' end.
"""
# http://stackoverflow.com/a/23575771/1878788
# Also works by setting language_level as comment
# at the top of the file.
#from __future__ import print_function
#from pybedtools import Interval
from
pysam.libchtslib
cimport
BAM_FREVERSE
#cdef int BAM_FREVERSE = 16
from
pysam.libcalignedsegment
cimport
AlignedSegment
cdef
object
ccollapse_ali
(
AlignedSegment
ali
):
"""
Return the bed information corresponding to the 5' collapse
of AlignedSegment *ali*.
"""
if
(
ali
.
flag
&
BAM_FREVERSE
)
!=
0
:
return
"
\t
"
.
join
([
ali
.
reference_name
,
# five prime position
str
(
ali
.
reference_end
-
1
),
str
(
ali
.
reference_end
),
"."
,
"."
,
"-"
])
else
:
return
"
\t
"
.
join
([
ali
.
reference_name
,
# five prime position
str
(
ali
.
reference_start
),
str
(
ali
.
reference_start
+
1
),
"."
,
"."
,
"+"
])
def
collapse_ali
(
ali
):
"""
Return the bed information corresponding to the 5' collapse
of AlignedSegment *ali*.
"""
return
ccollapse_ali
(
ali
)
def
main
():
"""Example use case."""
print
(
"No examples to show here."
)
return
0
if
__name__
==
"__main__"
:
import
sys
sys
.
exit
(
main
())
bam25prime/Python/bin/bam25prime
deleted
120000 → 0
View file @
259967e5
../bam25prime/bam25prime.py
\ No newline at end of file
bam25prime/Python/install.sh
deleted
100755 → 0
View file @
259967e5
#!/bin/sh
python3.6 setup.py build_ext
# .egg-link does not work with PYTHONPATH ?
python3.6
-m
pip
install
-e
.
python3.6
-m
pip
install
--no-deps
--ignore-installed
.
bam25prime/Python/setup.py
deleted
100644 → 0
View file @
259967e5
from
setuptools
import
setup
,
find_packages
from
glob
import
glob
# If you have .pyx things to cythonize
from
distutils.extension
import
Extension
from
Cython.Build
import
cythonize
from
pybedtools.helpers
import
get_includes
as
pybedtools_get_includes
from
pysam
import
get_include
as
pysam_get_include
name
=
"bam25prime"
__version__
=
"0.1"
# https://github.com/cython/cython/blob/master/docs/src/reference/compilation.rst#configuring-the-c-build
extensions
=
[
Extension
(
"bam25prime.libcollapsesam"
,
[
"bam25prime/libcollapsesam.pyx"
],
include_dirs
=
pysam_get_include
()),
Extension
(
"bam25prime.libcollapsebed"
,
[
"bam25prime/libcollapsebed.pyx"
],
include_dirs
=
glob
(
"/home/bli/src/bedtools2-2.27.1/src/utils/*"
),
#include_dirs=pybedtools_get_includes(),
language
=
"c++"
),
]
setup
(
name
=
name
,
version
=
__version__
,
description
=
"Library providing utilities to collapse aligned segments"
"in a bam file to their 5-prime end."
,
author
=
"Blaise Li"
,
author_email
=
"blaise.li@normalesup.org"
,
license
=
"MIT"
,
packages
=
find_packages
(),
scripts
=
[
"bin/%s"
%
name
],
ext_modules
=
cythonize
(
extensions
),
#ext_modules = cythonize("bam25prime/libbam25prime.pyx", include_path=pysam_get_include()),
install_requires
=
[
"pysam"
,
"pybedtools"
])
# If you have .pyx things to cythonize
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#install_requires=["cytoolz"],
#zip_safe=False)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment