From 777c3d7f9ed05c4fd037eb3342bd12e89128ad68 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Tue, 20 Apr 2021 10:33:18 +0200
Subject: [PATCH] Pre-filter intervals before collapsing.

---
 bam25prime/__init__.py        |  1 +
 bam25prime/bam25prime.py      | 10 ++++++---
 bam25prime/libcollapsebed.pyx | 39 +++++++++++++++++++++++++++++++++++
 setup.py                      |  2 +-
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/bam25prime/__init__.py b/bam25prime/__init__.py
index 9ec70ee..f486607 100644
--- a/bam25prime/__init__.py
+++ b/bam25prime/__init__.py
@@ -9,4 +9,5 @@ from .bam25prime import (
     collapse_and_sort_bedtool,
     filter_feature_size,
     make_bed_shifter,
+    make_bed_shift_checker,
     )
diff --git a/bam25prime/bam25prime.py b/bam25prime/bam25prime.py
index d57a2fa..c39bacf 100755
--- a/bam25prime/bam25prime.py
+++ b/bam25prime/bam25prime.py
@@ -25,7 +25,9 @@ from pybedtools import BedTool
 from pybedtools.featurefuncs import greater_than, less_than
 from pysam import AlignmentFile
 from .libcollapsesam import collapse_ali
-from .libcollapsebed import collapse_bed, make_bed_shifter
+from .libcollapsebed import (
+    collapse_bed,
+    make_bed_shifter, make_bed_shift_checker)
 
 # pybedtools.Interval | pysam.AlignedSegment
 # chrom               | reference_name
@@ -94,9 +96,10 @@ def collapse_and_sort(alis, shift=0):
             "\n".join(map(collapse_ali, alis)),
             from_string=True).sort(stream=True)
     shift_bed = make_bed_shifter(shift)
+    canshift_bed = make_bed_shift_checker(shift)
     return BedTool(
         "\n".join(map(collapse_ali, alis)),
-        from_string=True).each(
+        from_string=True).filter(canshift_bed).each(
             shift_bed).remove_invalid().sort(stream=True)
 
 
@@ -120,7 +123,8 @@ def collapse_and_sort_bedtool(bedtool, shift=0):
     if shift == 0:
         return bedtool.each(collapse_bed).sort(stream=True)
     shift_bed = make_bed_shifter(shift)
-    return bedtool.each(collapse_bed).each(
+    canshift_bed = make_bed_shift_checker(shift)
+    return bedtool.each(collapse_bed).filter(canshift_bed).each(
         shift_bed).remove_invalid().sort(stream=True)
 
 
diff --git a/bam25prime/libcollapsebed.pyx b/bam25prime/libcollapsebed.pyx
index 5266d6c..45a4aaf 100644
--- a/bam25prime/libcollapsebed.pyx
+++ b/bam25prime/libcollapsebed.pyx
@@ -27,6 +27,45 @@ This library contains a cythonized version of a function to collapse
 from pybedtools.cbedtools cimport Interval
 
 
+cdef ccanshift_bed(Interval bed, int shift):
+    """
+    Check whether the *bed* Interval could be shifted by
+    *shift* positions without having negative coordinates.
+
+    This should be used as filter before attempting shifts,
+    in order to avoid invalid intervals.
+    """
+    if bed.strand == "-":
+        if shift > bed.start):
+            return False
+        # bed.start = bed.start - shift  # would be negative
+        # bed.stop = bed.stop - shift
+    else:
+        if (-shift) > bed.start:
+            return False
+        # bed.start = bed.start + shift  # would be negative
+        # bed.stop = bed.stop + shift
+    return True
+
+
+def make_bed_shift_checker(shift):
+    """
+    Make a function that checks whether bed intervals
+    can be shifted by *shift* positions (with respect to
+    the feature's orientation).
+    """
+    def canshift_bed(bed):
+        """
+        Check whether the *bed* Interval could be shifted by
+        *shift* positions without having negative coordinates.
+
+        This should be used as filter before attempting shifts,
+        in order to avoid invalid intervals.
+        """
+        return ccanshift_bed(bed, shift)
+    return canshift_bed
+
+
 cdef cshift_bed(Interval bed, int shift):
     """
     Return the Interval corresponding to the shift
diff --git a/setup.py b/setup.py
index 7bfa7d9..648e71d 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ from pybedtools.helpers import get_includes as pybedtools_get_includes
 from pysam import get_include as pysam_get_include
 
 name = "bam25prime"
-__version__ = "0.2"
+__version__ = "0.3"
 
 
 # https://stackoverflow.com/a/54138355/1878788
-- 
GitLab