diff --git a/bam25prime/__init__.py b/bam25prime/__init__.py index 9ec70ee2161d3c1de24be98018b64fffb82e8dba..f486607f5cff965189c015392d177585983dce5c 100644 --- a/bam25prime/__init__.py +++ b/bam25prime/__init__.py @@ -9,4 +9,5 @@ from .bam25prime import ( collapse_and_sort_bedtool, filter_feature_size, make_bed_shifter, + make_bed_shift_checker, ) diff --git a/bam25prime/bam25prime.py b/bam25prime/bam25prime.py index d57a2faabffaed3023460bda7bacea914ea1f549..c39bacf10ef29caea08121c66f5e47667c555940 100755 --- a/bam25prime/bam25prime.py +++ b/bam25prime/bam25prime.py @@ -25,7 +25,9 @@ from pybedtools import BedTool from pybedtools.featurefuncs import greater_than, less_than from pysam import AlignmentFile from .libcollapsesam import collapse_ali -from .libcollapsebed import collapse_bed, make_bed_shifter +from .libcollapsebed import ( + collapse_bed, + make_bed_shifter, make_bed_shift_checker) # pybedtools.Interval | pysam.AlignedSegment # chrom | reference_name @@ -94,9 +96,10 @@ def collapse_and_sort(alis, shift=0): "\n".join(map(collapse_ali, alis)), from_string=True).sort(stream=True) shift_bed = make_bed_shifter(shift) + canshift_bed = make_bed_shift_checker(shift) return BedTool( "\n".join(map(collapse_ali, alis)), - from_string=True).each( + from_string=True).filter(canshift_bed).each( shift_bed).remove_invalid().sort(stream=True) @@ -120,7 +123,8 @@ def collapse_and_sort_bedtool(bedtool, shift=0): if shift == 0: return bedtool.each(collapse_bed).sort(stream=True) shift_bed = make_bed_shifter(shift) - return bedtool.each(collapse_bed).each( + canshift_bed = make_bed_shift_checker(shift) + return bedtool.each(collapse_bed).filter(canshift_bed).each( shift_bed).remove_invalid().sort(stream=True) diff --git a/bam25prime/libcollapsebed.pyx b/bam25prime/libcollapsebed.pyx index 5266d6c5bfb6443fd11d55978dadfaecd0c477a0..45a4aafc5ccead6b371c6c7fec4f6173f5564c2f 100644 --- a/bam25prime/libcollapsebed.pyx +++ b/bam25prime/libcollapsebed.pyx @@ -27,6 +27,45 @@ This library contains a cythonized version of a function to collapse from pybedtools.cbedtools cimport Interval +cdef ccanshift_bed(Interval bed, int shift): + """ + Check whether the *bed* Interval could be shifted by + *shift* positions without having negative coordinates. + + This should be used as filter before attempting shifts, + in order to avoid invalid intervals. + """ + if bed.strand == "-": + if shift > bed.start): + return False + # bed.start = bed.start - shift # would be negative + # bed.stop = bed.stop - shift + else: + if (-shift) > bed.start: + return False + # bed.start = bed.start + shift # would be negative + # bed.stop = bed.stop + shift + return True + + +def make_bed_shift_checker(shift): + """ + Make a function that checks whether bed intervals + can be shifted by *shift* positions (with respect to + the feature's orientation). + """ + def canshift_bed(bed): + """ + Check whether the *bed* Interval could be shifted by + *shift* positions without having negative coordinates. + + This should be used as filter before attempting shifts, + in order to avoid invalid intervals. + """ + return ccanshift_bed(bed, shift) + return canshift_bed + + cdef cshift_bed(Interval bed, int shift): """ Return the Interval corresponding to the shift diff --git a/setup.py b/setup.py index 7bfa7d9cc412af3b6ee577ec7dd5069e758b070e..648e71d89b486a66bb05fdfb3e5d79e2f3ae6e4d 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ from pybedtools.helpers import get_includes as pybedtools_get_includes from pysam import get_include as pysam_get_include name = "bam25prime" -__version__ = "0.2" +__version__ = "0.3" # https://stackoverflow.com/a/54138355/1878788