Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
libbamutils
Manage
Activity
Members
Labels
Plan
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Blaise LI
libbamutils
Commits
a0966db5
Commit
a0966db5
authored
4 years ago
by
Blaise Li
Browse files
Options
Downloads
Patches
Plain Diff
Fix dependencies, distance can use bed.
parent
4be6fea6
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
libbamutils/libbamutils.py
+42
-32
42 additions, 32 deletions
libbamutils/libbamutils.py
pyproject.toml
+1
-1
1 addition, 1 deletion
pyproject.toml
setup.py
+2
-2
2 additions, 2 deletions
setup.py
with
45 additions
and
35 deletions
libbamutils/libbamutils.py
+
42
−
32
View file @
a0966db5
...
@@ -20,11 +20,13 @@ from collections import Counter, deque
...
@@ -20,11 +20,13 @@ from collections import Counter, deque
from
itertools
import
dropwhile
,
takewhile
from
itertools
import
dropwhile
,
takewhile
from
operator
import
attrgetter
from
operator
import
attrgetter
from
cytoolz
import
merge_with
from
cytoolz
import
merge_with
from
pysam
import
AlignmentFile
# To get coordinates of genes
# To get coordinates of genes
from
libhts
import
id_list_gtf2bed
from
libhts
import
id_list_gtf2bed
# To collapse aligned reads to their 5' ends
# To collapse aligned reads to their 5' ends
from
bam25prime
import
collapse_and_sort
,
filter_feature_size
from
bam25prime
import
collapse_and_sort
,
filter_feature_size
# bam25prime re-exports those classes from pysam and pybedtools
# to enable simpler dependency management
from
bam25prime
import
AlignmentFile
,
BedTool
from
libreads
import
bam2fastq
from
libreads
import
bam2fastq
...
@@ -372,6 +374,14 @@ class BedFile(GenposFile):
...
@@ -372,6 +374,14 @@ class BedFile(GenposFile):
def
__init__
(
self
,
bedpath
):
def
__init__
(
self
,
bedpath
):
self
.
bed
=
BedTool
(
bedpath
)
self
.
bed
=
BedTool
(
bedpath
)
# To be usable with context manager "with" syntax
# for compatibility with BamFile (not sure this really makes sense)
def
__enter__
(
self
):
return
self
def
__exit__
(
self
,
exception_type
,
exception_value
,
traceback
):
pass
def
to_5prime
(
self
,
intervals
=
None
,
min_len
=
None
,
max_len
=
None
):
def
to_5prime
(
self
,
intervals
=
None
,
min_len
=
None
,
max_len
=
None
):
"""
"""
Iterate over the *self.bed* and yield only those features
Iterate over the *self.bed* and yield only those features
...
@@ -572,25 +582,25 @@ class BamFile(GenposFile):
...
@@ -572,25 +582,25 @@ class BamFile(GenposFile):
# TODO: allow replacing bam files with bed files,
# TODO: allow replacing bam files with bed files,
# using pybedtools.BedTool and collapse_and_sort_bedtool
# using pybedtools.BedTool and collapse_and_sort_bedtool
def
read_distances
(
def
read_distances
(
bampath_1
,
bampath
_2
,
genposf_1
,
genposf
_2
,
intervals
=
None
,
intervals
=
None
,
min_len_1
=
None
,
max_len_1
=
None
,
min_len_1
=
None
,
max_len_1
=
None
,
min_len_2
=
None
,
max_len_2
=
None
,
min_len_2
=
None
,
max_len_2
=
None
,
max_dists
=
None
,
antisense
=
True
):
max_dists
=
None
,
antisense
=
True
):
"""
"""
Compute the distribution of distances of
reads in Bam formatted f
ile
Compute the distribution of distances of
genomic positions in GenposF
ile
*bampath
_2* with respect to
reads in Bam formatted file *bampath
_1*.
object *genposf
_2* with respect to
those in other GenposFile *genposf
_1*.
If *intervals* is provided, only
read
s from file *
bampath
_1* belonging to
If *intervals* is provided, only
position
s from file *
genposf
_1* belonging to
these genomic intervals will be considered.
these genomic intervals will be considered.
If *min_len_1* and / or *max_len_1* are provided,
read
s from
file
If *min_len_1* and / or *max_len_1* are provided,
position
s from
*
bampath
_1* that fall outside those boundaries will not be considered.
*
genposf
_1* that fall outside those boundaries will not be considered.
If *min_len_2* and / or *max_len_2* are provided,
read
s from
file
If *min_len_2* and / or *max_len_2* are provided,
position
s from
*bampath_2* that fall outside those boundaries will not be considered.
*bampath_2* that fall outside those boundaries will not be considered.
The distances are those between 5
'
ends of the
read
s.
The distances are those between 5
'
ends of the
position
s.
If *max_dists* is provided, it should be a dictionary
If *max_dists* is provided, it should be a dictionary
of pairs (dist_before, dist_after) keyed by strand
of pairs (dist_before, dist_after) keyed by strand
...
@@ -598,28 +608,26 @@ def read_distances(
...
@@ -598,28 +608,26 @@ def read_distances(
By default, it is the following:
By default, it is the following:
max_dists = {
"
+
"
: (120, 120),
"
-
"
: (120, 120)}
max_dists = {
"
+
"
: (120, 120),
"
-
"
: (120, 120)}
This means that for a given
read from *bampath
_1*,
This means that for a given
position from *genposf
_1*,
read
s from *
bampath
_2* whose 5
'
end falls outside a [-120, 120] range
position
s from *
genposf
_2* whose 5
'
end falls outside a [-120, 120] range
around the 5
'
end of the
read
of interest will be ignored.
around the 5
'
end of the
position
of interest will be ignored.
By default, distances will only take into account
alignment
s
By default, distances will only take into account
position
s
on opposite strands.
on opposite strands.
If *antisense* is set to False, then distances will only consider
If *antisense* is set to False, then distances will only consider
alignment
s on the same strand.
position
s on the same strand.
"""
"""
if
max_dists
is
None
:
if
max_dists
is
None
:
max_dists
=
{
"
+
"
:
(
120
,
120
),
"
-
"
:
(
120
,
120
)}
max_dists
=
{
"
+
"
:
(
120
,
120
),
"
-
"
:
(
120
,
120
)}
with
BamFile
(
bampath_1
)
as
refbam
,
\
BamFile
(
bampath_2
)
as
smallbam
:
# combine the counters
# combine the counters
return
merge_with
(
return
merge_with
(
sum
,
sum
,
# count distances to neighbouring
antisense small read
s
# count distances to neighbouring
genposf_2 position
s
# for each
refbam read
# for each
genposf_1 positions
(
dists_to_ali
(
ali
,
neighbours
)
for
(
ali
,
neighbours
)
in
zip_alis
(
(
dists_to_ali
(
ali
,
neighbours
)
for
(
ali
,
neighbours
)
in
zip_alis
(
smallbam
.
to_5prime
(
genposf_2
.
to_5prime
(
min_len
=
min_len_2
,
max_len
=
max_len_2
),
min_len
=
min_len_2
,
max_len
=
max_len_2
),
refbam
.
to_5prime
(
genposf_1
.
to_5prime
(
intervals
=
intervals
,
intervals
=
intervals
,
min_len
=
min_len_1
,
max_len
=
max_len_1
),
min_len
=
min_len_1
,
max_len
=
max_len_1
),
max_dists
=
max_dists
,
antisense
=
antisense
)))
max_dists
=
max_dists
,
antisense
=
antisense
)))
...
@@ -654,7 +662,9 @@ def compute_read_distances(
...
@@ -654,7 +662,9 @@ def compute_read_distances(
gene_list
,
annotations
,
id_kwd
)
gene_list
,
annotations
,
id_kwd
)
else
:
else
:
intervals
=
None
intervals
=
None
with
BamFile
(
bampath_1
)
as
refbam
,
\
BamFile
(
bampath_2
)
as
smallbam
:
return
read_distances
(
return
read_distances
(
bampath_1
,
bampath_2
,
refbam
,
smallbam
,
intervals
=
intervals
,
intervals
=
intervals
,
an
s
isense
=
antisense
)
an
t
isense
=
antisense
)
This diff is collapsed.
Click to expand it.
pyproject.toml
+
1
−
1
View file @
a0966db5
[build-system]
[build-system]
requires
=
[
requires
=
[
"setuptools"
,
"setuptools"
,
"bam25prime @ git+https://gitlab.pasteur.fr/bli/bam25prime.git"
,
"libreads @ git+https://gitlab.pasteur.fr/bli/libreads.git"
,
"libreads @ git+https://gitlab.pasteur.fr/bli/libreads.git"
,
"pysam"
]
]
build-backend
=
"setuptools.build_meta"
build-backend
=
"setuptools.build_meta"
This diff is collapsed.
Click to expand it.
setup.py
+
2
−
2
View file @
a0966db5
...
@@ -19,8 +19,8 @@ setup(
...
@@ -19,8 +19,8 @@ setup(
license
=
"
MIT
"
,
license
=
"
MIT
"
,
packages
=
find_packages
(),
packages
=
find_packages
(),
install_requires
=
[
install_requires
=
[
"
libreads
@ git+https://gitlab.pasteur.fr/bli/
libreads
.git
"
,
"
bam25prime
@ git+https://gitlab.pasteur.fr/bli/
bam25prime
.git
"
])
"
pysam
"
])
"
libreads @ git+https://gitlab.pasteur.fr/bli/libreads.git
"
])
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#ext_modules = cythonize("libsmallrna/libsmallrna.pyx"),
#install_requires=["cytoolz"],
#install_requires=["cytoolz"],
#zip_safe=False
#zip_safe=False
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment