Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Blaise LI
libhts
Commits
53b52ea7
Commit
53b52ea7
authored
Jun 07, 2019
by
Blaise Li
Browse files
Added bam2bigwig.sh to scripts.
The script is taken from bioinfo_utils.
parent
c74efa0f
Changes
2
Hide whitespace changes
Inline
Side-by-side
scripts/bam2bigwig.sh
0 → 100755
View file @
53b52ea7
#!/bin/dash
# Usage: bam2bigwig.sh <sorted.bam> <bin_file> <library_name> <orientation> <strandedness> [<normalization>] <bigwig>
# <bin_file> should be a bed file representing 10 bp bins along the genome
# <orientation> must be "all", "fwd" or "rev"
# <strandedness> must be "F" or "R"
# It can be "U" (see http://sailfish.readthedocs.io/en/master/library_type.html),
# but anything else than "R" will be treated as "F".
# <normalization> can be either a file or a number
# If it is a file, it should contain at least two columns,
# the first one with library names and the second with size factors.
# If it is a value, it shoud be the size factor.
# Requires:
# * bedops (for bedmap)
# * python3 -> module load Python/3.6.0
# * samtools -> module load samtools/1.9
# * bedtools -> module load bedtools/2.25.0
# * mawk -> use alias if absent
# * parallel (for niceload) -> module load parallel/20170122
# * UCSC-tools (for bedGraphToBigWig) -> module load UCSC-tools/v373
workdir
=
$(
mktemp
-d
)
cleanup
()
{
rm
-rf
${
workdir
}
}
# http://linuxcommand.org/wss0150.php
PROGNAME
=
$(
basename
$0
)
error_exit
()
{
# ----------------------------------------------------------------
# Function for exit due to fatal program error
# Accepts 1 argument:
# string containing descriptive error message
# ----------------------------------------------------------------
cleanup
echo
"
${
PROGNAME
}
:
${
1
:-
"Unknown Error"
}
"
1>&2
exit
1
}
mawk
--version
1> /dev/null 2> /dev/null
||
alias
mawk
=
'awk'
bam
=
${
1
}
bin_file
=
${
2
}
# Example of how to generate the bin_file from iGenome data:
# cd /Genomes/C_elegans
# mawk '$1 == "@SQ" {print $2"\t0\t"$3}' Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.dict \
# | sed 's/SN://g' | sed 's/LN://' \
# | bedops --chop 10 - \
# > Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/genome_binned_10.bed
lib_name
=
${
3
}
orient
=
${
4
}
# "F" is library type is normally stranded, "R" if it is reverse-stranded
strandedness
=
${
5
}
if
[
$#
=
7
]
then
norm
=
${
6
}
bigwig
=
${
7
}
else
norm
=
""
bigwig
=
${
6
}
fi
bam_name
=
$(
basename
${
bam
}
)
mapping_name
=
${
bam_name
%_sorted.bam
}
mapping_dir
=
$(
dirname
${
bam
}
)
case
${
orient
}
in
"all"
)
orient_filter
=
""
;;
"fwd"
)
if
[
"
${
strandedness
}
"
=
"R"
]
then
orient_filter
=
"-strand -"
else
orient_filter
=
"-strand +"
fi
;;
"rev"
)
if
[
"
${
strandedness
}
"
=
"R"
]
then
orient_filter
=
"-strand +"
else
orient_filter
=
"-strand -"
fi
;;
*
)
error_exit
"unrecognised orientation
${
orient
}
"
;;
esac
bigwig_dir
=
$(
dirname
${
bigwig
}
)
echo
"working in
${
workdir
}
"
mkdir
-p
${
bigwig_dir
}
if
[
"
${
norm
}
"
]
then
if
[
-f
${
norm
}
]
then
echo
"reading scaling factor for
${
lib_name
}
in
${
norm
}
"
size
=
$(
grep
-w
"^
${
lib_name
}
"
${
norm
}
|
cut
-f2
)
if
[
!
${
size
}
]
then
error_exit
"size could not be found on column 2, line
${
lib_name
}
of
${
norm
}
"
#cleanup && error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
else
echo
"found
${
size
}
for
${
lib_name
}
"
fi
else
# Not a file
echo
"assuming
${
norm
}
is the size factor"
size
=
"
${
norm
}
"
fi
scale
=
$(
python3
-c
"print(1.0 /
${
size
}
)"
)
scaling
=
"-scale
${
scale
}
"
bedgraph
=
"
${
workdir
}
/
${
mapping_name
}
_norm_
${
orient
}
_genomecov.bg"
else
scaling
=
""
bedgraph
=
"
${
workdir
}
/
${
mapping_name
}
_
${
orient
}
_genomecov.bg"
fi
echo
"getting chromosome bounds"
genome_file
=
"
${
workdir
}
/chrom_sizes.txt"
samtools view
-H
${
bam
}
\
| mawk
'$1 == "@SQ" {print $2"\t"$3}'
\
|
sed
's/SN://g'
|
sed
's/LN://'
\
>
${
genome_file
}
||
error_exit
"making
${
genome_file
}
failed"
#> ${genome_file} || cleanup && error_exit "making ${genome_file} failed"
compute_coverage
()
{
cmd
=
"niceload --noswap bedtools genomecov -bg -split
${
orient_filter
}
${
scaling
}
-ibam
${
bam
}
"
eval
${
cmd
}
\
| mawk
'{ print $1"\t"$2"\t"$3"\tid-"NR"\t"$4; }'
| sort-bed -
\
||
error_exit
"compute_coverage failed"
#|| cleanup && error_exit "compute_coverage failed"
}
# TODO: find how to run this with niceload (there may be an issue with the quoting)
make_bins
()
{
cmd
=
"bedmap --faster --echo --mean --delim
\"\t\"
--skip-unmapped
${
bin_file
}
-"
eval
${
cmd
}
||
error_exit
"make_bins failed"
#eval ${cmd} || cleanup && error_exit "make_bins failed"
}
compute_coverage | make_bins
\
>
${
bedgraph
}
||
error_exit
"generating bedgraph failed"
#> ${bedgraph} || cleanup && error_exit "generating bedgraph failed"
echo
"making bigwig"
niceload
--noswap
bedGraphToBigWig
${
bedgraph
}
${
genome_file
}
${
bigwig
}
||
error_exit
"bedGraphToBigWig failed"
#bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || cleanup && error_exit "bedGraphToBigWig failed"
echo
"removing
${
bedgraph
}
"
rm
-f
${
bedgraph
}
echo
"removing
${
genome_file
}
"
rm
-f
${
genome_file
}
cleanup
# rm -rf ${workdir}
exit
0
setup.py
View file @
53b52ea7
...
...
@@ -18,7 +18,7 @@ setup(
author_email
=
"blaise.li@normalesup.org"
,
license
=
"MIT"
,
packages
=
find_packages
(),
scripts
=
[
"scripts/sam2indexedbam.sh"
],
scripts
=
[
"scripts/bam2bigwig.sh"
,
"scripts/sam2indexedbam.sh"
],
install_requires
=
[
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment