Skip to content
Snippets Groups Projects
Commit 92433c83 authored by Blaise Li's avatar Blaise Li
Browse files

Strandedness fix depends on bedtools version.

Also updated dependencies
parent ac846d93
No related branches found
No related tags found
No related merge requests found
__copyright__ = "Copyright (C) 2020 Blaise Li"
__licence__ = "GNU GPLv3"
__version__ = 0.7
__version__ = 0.8
from .libbamutils import (
BamFile, BedFile,
filter_read_size,
......
......@@ -21,7 +21,7 @@ from itertools import dropwhile, takewhile
from operator import attrgetter
from cytoolz import merge_with
# To get coordinates of genes
from libhts import id_list_gtf2bed
from libhts import BEDTOOLS_VERSION, id_list_gtf2bed
# To collapse aligned reads to their 5' ends
from bam25prime import (
collapse_and_sort,
......@@ -60,6 +60,20 @@ def filter_read_size(alis, min_len=None, max_len=None):
yield ali
# To correctly preserve strand information after merging records
# See https://bioinformatics.stackexchange.com/q/15834/292
[major, minor, patch] = BEDTOOLS_VERSION[1:].split("."))
if (major, minor) >= (2, 27):
# s=True: Force strandedness.
# That is, only merge features that are on the same strand
# Otherwise, strand information is lost
# With 2.27 and above, it seeùs this is not enough.
# Column 7 in gtf contains strand information.
merge_kwds = {"s": True, c: 7, o: "distinct"}
else:
merge_kwds = {"s": True}
def get_gene_intervals(gene_list_filename, annot_filename, id_kwd=None):
"""
Obtain the bed coordinates of the genomic regions covered by
......@@ -79,10 +93,8 @@ def get_gene_intervals(gene_list_filename, annot_filename, id_kwd=None):
return id_list_gtf2bed(
gene_identifiers, annot_filename,
id_kwd=id_kwd).sort().merge(
# s=True: Force strandedness.
# That is, only merge features that are on the same strand
# Otherwise, strand information is lost
s=True, stream=True)
**merge_kwds,
stream=True)
# TODO: allow different upstream and downstream maximum distances
......
......@@ -20,6 +20,8 @@ setup(
packages=find_packages(),
install_requires=[
"bam25prime @ git+https://gitlab.pasteur.fr/bli/bam25prime.git",
"cytoolz",
"libhts @ git+https://gitlab.pasteur.fr/bli/libhts.git",
"libreads @ git+https://gitlab.pasteur.fr/bli/libreads.git"])
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#install_requires=["cytoolz"],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment