Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
libhts
Commits
7c79f77a
Commit
7c79f77a
authored
Apr 29, 2020
by
Blaise Li
Browse files
Add script to copy and paste bigwig regions.
parent
cde7fc33
Changes
7
Hide whitespace changes
Inline
Side-by-side
.gitattributes
0 → 100644
View file @
7c79f77a
*.bw filter=lfs diff=lfs merge=lfs -text
libhts/__init__.py
View file @
7c79f77a
__copyright__
=
"Copyright (C) 2020 Blaise Li"
__copyright__
=
"Copyright (C) 2020 Blaise Li"
__licence__
=
"GNU GPLv3"
__licence__
=
"GNU GPLv3"
__version__
=
0.
2
__version__
=
0.
3
from
.libhts
import
(
from
.libhts
import
(
aligner2min_mapq
,
aligner2min_mapq
,
gtf_2_genes_exon_lengths
,
gtf_2_genes_exon_lengths
,
...
@@ -8,6 +8,7 @@ from .libhts import (
...
@@ -8,6 +8,7 @@ from .libhts import (
make_empty_bigwig
,
make_empty_bigwig
,
make_seeding_function
,
make_seeding_function
,
median_ratio_to_pseudo_ref_size_factors
,
median_ratio_to_pseudo_ref_size_factors
,
paste_bigwig_region
,
plot_boxplots
,
plot_counts_distribution
,
plot_histo
,
plot_boxplots
,
plot_counts_distribution
,
plot_histo
,
plot_lfc_distribution
,
plot_MA
,
plot_lfc_distribution
,
plot_MA
,
plot_norm_correlations
,
plot_paired_scatters
,
plot_scatter
,
plot_norm_correlations
,
plot_paired_scatters
,
plot_scatter
,
...
...
libhts/data/klp7co_RPF.bw
0 → 100644
LFS
View file @
7c79f77a
File added
libhts/libhts.py
View file @
7c79f77a
...
@@ -18,6 +18,8 @@ from functools import reduce
...
@@ -18,6 +18,8 @@ from functools import reduce
import
warnings
import
warnings
import
numpy
as
np
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
# To compute bins in bigwig data
from
scipy.stats
import
binned_statistic
# To compute correlation coefficient, and compute linear regression
# To compute correlation coefficient, and compute linear regression
from
scipy.stats.stats
import
pearsonr
,
linregress
from
scipy.stats.stats
import
pearsonr
,
linregress
# To compute geometric mean
# To compute geometric mean
...
@@ -249,6 +251,69 @@ def make_empty_bigwig(filename, chrom_sizes):
...
@@ -249,6 +251,69 @@ def make_empty_bigwig(filename, chrom_sizes):
bw_out
.
close
()
bw_out
.
close
()
# Possible improvements: use an itearble of (from_region, to_region) pairs,
# or two zippable iterables
def
paste_bigwig_region
(
from_fname
,
to_fname
,
from_region
,
to_region
,
dest_fname
,
nanmean
=
False
):
"""
Take values from a region *from_region* in a bigwig file *from_fname* and
paste them into a region *to_region* in another bigwig file *to_fname*, and
write the results in a third bigwig file *dest_fname*.
A region should be specified as a (chrom, start, stop) triplet, where start
is zero-based and stop is 1-based (like BED coordinates or Python slices).
*dest_fname* will have the same chromosomes as *to_fname*.
The values in *to_region* are substituted.
Current limitation: The regions should have the same length.
Option *nanmean* is likely not working correctly.
"""
(
from_chrom
,
from_start
,
from_stop
)
=
from_region
from_len
=
from_stop
-
from_start
(
to_chrom
,
to_start
,
to_stop
)
=
to_region
assert
to_stop
-
to_start
==
from_len
,
(
"Regions should have the same lengths."
)
from_bw
=
pyBigWig
.
open
(
from_fname
)
to_bw
=
pyBigWig
.
open
(
to_fname
)
chrom_sizes
=
list
(
to_bw
.
chroms
().
items
())
dest_bw
=
pyBigWig
.
open
(
dest_fname
,
"w"
)
dest_bw
.
addHeader
(
chrom_sizes
)
for
(
chrom
,
chrom_len
)
in
chrom_sizes
:
nb_bins
=
ceil
(
chrom_len
/
10
)
# values = to_bw.values(chrom, 0, chrom_len)
# Original values, plus some nans for binned_statistics to properly set the bin boundaries
values
=
np
.
pad
(
to_bw
.
values
(
chrom
,
0
,
chrom_len
),
# pad zero on the left, and what is needed to complete the bin on the right
(
0
,
10
*
nb_bins
-
chrom_len
),
constant_values
=
np
.
nan
)
if
chrom
==
to_chrom
:
# Replace the values in the desired region
values
[
to_start
:
to_stop
]
=
from_bw
.
values
(
from_chrom
,
from_start
,
from_stop
)
if
nanmean
:
bin_means
=
binned_statistic
(
range
(
0
,
10
*
nb_bins
),
values
,
statistic
=
np
.
nanmean
,
bins
=
nb_bins
).
statistic
else
:
bin_means
=
binned_statistic
(
# range(0, chrom_len), np.nan_to_num(values),
range
(
0
,
10
*
nb_bins
),
values
,
statistic
=
"mean"
,
bins
=
nb_bins
).
statistic
dest_bw
.
addEntries
(
chrom
,
0
,
# Mean for each bin of size 10
values
=
bin_means
,
# values=np.nan_to_num(np.zeros(chrom_len)[0::10]),
span
=
10
,
step
=
10
)
dest_bw
.
close
()
to_bw
.
close
()
from_bw
.
close
()
#################
#################
# Bowtie2 stuff #
# Bowtie2 stuff #
#################
#################
...
...
libhts/test_libhts.py
0 → 100644
View file @
7c79f77a
import
os
import
shutil
import
tempfile
import
unittest
import
libhts
import
pyBigWig
import
numpy
as
np
class
TestPasteBigWig
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
bw_in_path
=
"data/klp7co_RPF.bw"
self
.
from_region
=
(
"klp7co"
,
0
,
2459
)
self
.
to_region
=
(
"III"
,
(
10808346
-
20
),
(
10808346
-
20
)
+
2459
)
self
.
tmpdir
=
tempfile
.
mkdtemp
()
self
.
bw_out_path
=
os
.
path
.
join
(
self
.
tmpdir
,
"pasted.bw"
)
libhts
.
paste_bigwig_region
(
self
.
bw_in_path
,
self
.
bw_in_path
,
self
.
from_region
,
self
.
to_region
,
self
.
bw_out_path
)
def
tearDown
(
self
):
shutil
.
rmtree
(
self
.
tmpdir
)
def
test_preserved_region_num_bins
(
self
):
bw_in
=
pyBigWig
.
open
(
self
.
bw_in_path
)
bw_out
=
pyBigWig
.
open
(
self
.
bw_out_path
)
bins_in
=
{
(
start
,
stop
):
val
for
(
start
,
stop
,
val
)
in
bw_in
.
intervals
(
*
self
.
from_region
)
if
(
not
np
.
isnan
(
val
))}
# if (not np.isnan(val)) and val > 0}
# contains extra (start, stop, nan),
# that's where there was no bin
bins_out
=
{
(
start
,
stop
):
val
for
(
start
,
stop
,
val
)
in
bw_out
.
intervals
(
*
self
.
from_region
)
if
(
not
np
.
isnan
(
val
))}
self
.
assertTrue
(
len
(
bins_in
)
==
len
(
bins_out
))
def
test_preserved_region_bin_contents
(
self
):
bw_in
=
pyBigWig
.
open
(
self
.
bw_in_path
)
bw_out
=
pyBigWig
.
open
(
self
.
bw_out_path
)
bins_in
=
[
(
start
,
stop
,
val
)
for
(
start
,
stop
,
val
)
in
bw_in
.
intervals
(
*
self
.
from_region
)
if
(
not
np
.
isnan
(
val
))]
bins_out
=
[
(
start
,
stop
,
val
)
for
(
start
,
stop
,
val
)
in
bw_out
.
intervals
(
*
self
.
from_region
)
if
(
not
np
.
isnan
(
val
))]
for
(
i
,
(
bin_in
,
bin_out
))
in
enumerate
(
zip
(
bins_in
,
bins_out
)):
self
.
assertTrue
(
bin_in
==
bin_out
)
# There are differences, maybe due to phase differences with respect to bins
# def test_pasted_region_num_bins(self):
# bw_in = pyBigWig.open(self.bw_in_path)
# bw_out = pyBigWig.open(self.bw_out_path)
# bins_in = {
# (start, stop): val
# for (start, stop, val)
# in bw_in.intervals(*self.from_region)
# if (not np.isnan(val))}
# # if (not np.isnan(val)) and val > 0}
# # contains extra (start, stop, nan),
# # that's where there was no bin
# bins_out = {
# (start, stop): val
# for (start, stop, val)
# in bw_out.intervals(*self.to_region)
# if (not np.isnan(val))}
# self.assertTrue(len(bins_in) == len(bins_out))
# def test_pasted_region_bin_contents(self):
# bw_in = pyBigWig.open(self.bw_in_path)
# bw_out = pyBigWig.open(self.bw_out_path)
# bins_in = [
# (start, stop, val)
# for (start, stop, val)
# in bw_in.intervals(*self.from_region)
# if (not np.isnan(val))]
# bins_out = [
# (start, stop, val)
# for (start, stop, val)
# in bw_out.intervals(*self.to_region)
# if (not np.isnan(val))]
# for (i, (bin_in, bin_out)) in enumerate(zip(bins_in, bins_out)):
# self.assertTrue(bin_in == bin_out)
if
__name__
==
'__main__'
:
unittest
.
main
()
scripts/copypaste_bigwig_regions.py
0 → 100755
View file @
7c79f77a
#!/usr/bin/env python3
# Copyright (C) 2020 Blaise Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""Copy bigwig data from a region in a bigwig file into another region.
The other region can be in the same or in another bigwig file.
The result will be written in another bigwig file.
"""
from
argparse
import
(
ArgumentParser
,
ArgumentDefaultsHelpFormatter
,
RawTextHelpFormatter
)
import
sys
from
operator
import
itemgetter
from
yaml
import
safe_load
as
yload
from
libhts
import
paste_bigwig_region
CONFIG_EXAMPLE
=
"""
# bigwig file containing the region to copy
from_bw: "data/klp7co_RPF.bw"
# bigwig file containing the region to replace
to_bw: "data/klp7co_RPF.bw"
# bigwig file in which to write the result
dest_bw: "/tmp/pasted.bw"
# region from which to copy (BED-like coordinates)
from_region:
chrom: "klp7co"
# zero-based coordinate
start: 0
# 1-based coordinate
end: 2459
# region in which to copy (BED-like coordinates)
to_region:
chrom: "III"
# zero-based coordinate
start: 10808326
# 1-based coordinate
end: 10810785
"""
class
Formatter
(
ArgumentDefaultsHelpFormatter
,
RawTextHelpFormatter
):
"""
Hybrid help formatter.
See comments of <https://stackoverflow.com/a/3853776/1878788>
"""
def
main
():
"""Run the command-line script."""
# parser = ArgumentParser(
# description=__doc__,
# formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser
=
ArgumentParser
(
description
=
f
"
{
__doc__
}
\n\n
Example configuration:
\n
{
CONFIG_EXAMPLE
}
"
,
formatter_class
=
Formatter
)
parser
.
add_argument
(
"-c"
,
"--configfile"
,
help
=
"""YAML-formatted file containing information about:
- input bigwig files
- output bigwig file
- region from which to copy
- region where to paste
"""
)
parser
.
add_argument
(
"-e"
,
"--example_config"
,
action
=
"store_true"
,
help
=
"display an example configuration file"
)
args
=
parser
.
parse_args
()
if
args
.
example_config
:
print
(
CONFIG_EXAMPLE
)
return
0
if
not
args
.
configfile
:
print
(
"You must provide a configuration file using option -c. "
"See option -e for an example."
,
file
=
sys
.
stderr
)
return
1
with
open
(
args
.
configfile
)
as
config_fh
:
config
=
yload
(
config_fh
)
region_extractor
=
itemgetter
(
"chrom"
,
"start"
,
"end"
)
from_region
=
region_extractor
(
config
[
"from_region"
])
to_region
=
region_extractor
(
config
[
"to_region"
])
paste_bigwig_region
(
config
[
"from_bw"
],
config
[
"to_bw"
],
from_region
,
to_region
,
config
[
"dest_bw"
])
return
0
if
__name__
==
"__main__"
:
sys
.
exit
(
main
())
setup.py
View file @
7c79f77a
...
@@ -32,7 +32,9 @@ setup(
...
@@ -32,7 +32,9 @@ setup(
author_email
=
"blaise.li@normalesup.org"
,
author_email
=
"blaise.li@normalesup.org"
,
license
=
"GNU GPLv3"
,
license
=
"GNU GPLv3"
,
packages
=
find_packages
(),
packages
=
find_packages
(),
scripts
=
[
"scripts/bam2bigwig.sh"
,
"scripts/sam2indexedbam.sh"
],
scripts
=
[
"scripts/bam2bigwig.sh"
,
"scripts/sam2indexedbam.sh"
,
"scripts/copypaste_bigwig_regions.py"
],
install_requires
=
[
install_requires
=
[
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment