Commit 53b52ea7 authored by Blaise Li's avatar Blaise Li
Browse files

Added bam2bigwig.sh to scripts.

The script is taken from bioinfo_utils.
parent c74efa0f
#!/bin/dash
# Usage: bam2bigwig.sh <sorted.bam> <bin_file> <library_name> <orientation> <strandedness> [<normalization>] <bigwig>
# <bin_file> should be a bed file representing 10 bp bins along the genome
# <orientation> must be "all", "fwd" or "rev"
# <strandedness> must be "F" or "R"
# It can be "U" (see http://sailfish.readthedocs.io/en/master/library_type.html),
# but anything else than "R" will be treated as "F".
# <normalization> can be either a file or a number
# If it is a file, it should contain at least two columns,
# the first one with library names and the second with size factors.
# If it is a value, it shoud be the size factor.
# Requires:
# * bedops (for bedmap)
# * python3 -> module load Python/3.6.0
# * samtools -> module load samtools/1.9
# * bedtools -> module load bedtools/2.25.0
# * mawk -> use alias if absent
# * parallel (for niceload) -> module load parallel/20170122
# * UCSC-tools (for bedGraphToBigWig) -> module load UCSC-tools/v373
workdir=$(mktemp -d)
cleanup()
{
rm -rf ${workdir}
}
# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename $0)
error_exit ()
{
# ----------------------------------------------------------------
# Function for exit due to fatal program error
# Accepts 1 argument:
# string containing descriptive error message
# ----------------------------------------------------------------
cleanup
echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
exit 1
}
mawk --version 1> /dev/null 2> /dev/null || alias mawk='awk'
bam=${1}
bin_file=${2}
# Example of how to generate the bin_file from iGenome data:
# cd /Genomes/C_elegans
# mawk '$1 == "@SQ" {print $2"\t0\t"$3}' Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.dict \
# | sed 's/SN://g' | sed 's/LN://' \
# | bedops --chop 10 - \
# > Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/genome_binned_10.bed
lib_name=${3}
orient=${4}
# "F" is library type is normally stranded, "R" if it is reverse-stranded
strandedness=${5}
if [ $# = 7 ]
then
norm=${6}
bigwig=${7}
else
norm=""
bigwig=${6}
fi
bam_name=$(basename ${bam})
mapping_name=${bam_name%_sorted.bam}
mapping_dir=$(dirname ${bam})
case ${orient} in
"all")
orient_filter=""
;;
"fwd")
if [ "${strandedness}" = "R" ]
then
orient_filter="-strand -"
else
orient_filter="-strand +"
fi
;;
"rev")
if [ "${strandedness}" = "R" ]
then
orient_filter="-strand +"
else
orient_filter="-strand -"
fi
;;
*)
error_exit "unrecognised orientation ${orient}"
;;
esac
bigwig_dir=$(dirname ${bigwig})
echo "working in ${workdir}"
mkdir -p ${bigwig_dir}
if [ "${norm}" ]
then
if [ -f ${norm} ]
then
echo "reading scaling factor for ${lib_name} in ${norm}"
size=$(grep -w "^${lib_name}" ${norm} | cut -f2)
if [ ! ${size} ]
then
error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
#cleanup && error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
else
echo "found ${size} for ${lib_name}"
fi
else
# Not a file
echo "assuming ${norm} is the size factor"
size="${norm}"
fi
scale=$(python3 -c "print(1.0 / ${size})")
scaling="-scale ${scale}"
bedgraph="${workdir}/${mapping_name}_norm_${orient}_genomecov.bg"
else
scaling=""
bedgraph="${workdir}/${mapping_name}_${orient}_genomecov.bg"
fi
echo "getting chromosome bounds"
genome_file="${workdir}/chrom_sizes.txt"
samtools view -H ${bam} \
| mawk '$1 == "@SQ" {print $2"\t"$3}' \
| sed 's/SN://g' | sed 's/LN://' \
> ${genome_file} || error_exit "making ${genome_file} failed"
#> ${genome_file} || cleanup && error_exit "making ${genome_file} failed"
compute_coverage()
{
cmd="niceload --noswap bedtools genomecov -bg -split ${orient_filter} ${scaling} -ibam ${bam}"
eval ${cmd} \
| mawk '{ print $1"\t"$2"\t"$3"\tid-"NR"\t"$4; }' | sort-bed - \
|| error_exit "compute_coverage failed"
#|| cleanup && error_exit "compute_coverage failed"
}
# TODO: find how to run this with niceload (there may be an issue with the quoting)
make_bins()
{
cmd="bedmap --faster --echo --mean --delim \"\t\" --skip-unmapped ${bin_file} -"
eval ${cmd} || error_exit "make_bins failed"
#eval ${cmd} || cleanup && error_exit "make_bins failed"
}
compute_coverage | make_bins \
> ${bedgraph} || error_exit "generating bedgraph failed"
#> ${bedgraph} || cleanup && error_exit "generating bedgraph failed"
echo "making bigwig"
niceload --noswap bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || error_exit "bedGraphToBigWig failed"
#bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || cleanup && error_exit "bedGraphToBigWig failed"
echo "removing ${bedgraph}"
rm -f ${bedgraph}
echo "removing ${genome_file}"
rm -f ${genome_file}
cleanup
# rm -rf ${workdir}
exit 0
......@@ -18,7 +18,7 @@ setup(
author_email="blaise.li@normalesup.org",
license="MIT",
packages=find_packages(),
scripts=["scripts/sam2indexedbam.sh"],
scripts=["scripts/bam2bigwig.sh", "scripts/sam2indexedbam.sh"],
install_requires=[
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1",
#"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment