diff --git a/scripts/sam2indexedbam.sh b/scripts/sam2indexedbam.sh new file mode 100755 index 0000000000000000000000000000000000000000..f0a37c3b62d9e3518410b135fb7e351174e8710d --- /dev/null +++ b/scripts/sam2indexedbam.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Used to get mapping data suitable for making pileups or viewing with IGV +# Usage: sam2indexedbam.sh <mapping.sam> [paired] +# Add the option "paired" to only keep reads mapped in proper pair +# Works if either <mapping.sam>, <mapping.sam.gz> or <mapping.sam.bz2> exists. +# Generates two files: <mapping_sorted.bam> and <mapping_sorted.bam.bai>. + +# http://linuxcommand.org/wss0150.php +PROGNAME=$(basename $0) + +tmpdir=$(mktemp -dt "${PROGNAME}.XXXXXXXXXX") + +cleanup() +{ + rm -rf ${tmpdir} +} + + +function error_exit +{ +# ---------------------------------------------------------------- +# Function for exit due to fatal program error +# Accepts 1 argument: +# string containing descriptive error message +# ---------------------------------------------------------------- + cleanup + echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2 + exit 1 +} + +samtools --version + +SAM=${1} + +case ${SAM} in + *.sam) + ext="sam" + ;; + *.bam) + ext="bam" + ;; + *) + error_exit "unrecognised file extension" + ;; +esac + +# -F 4: skip reads that did not map +if [[ ${2} ]] +then + filter="-F 4 -f 2" +else + filter="-F 4" +fi + +# Test environment variable THREADS +if [[ ${SAMTOOLS_THREADS} ]] +then + threads_option="--threads ${SAMTOOLS_THREADS}" +else + threads_option="" +fi + +if [ -e ${SAM} ] +then + # -u: output uncompressed bam, to save time when piping to samtools sort + samtools view ${threads_option} -u ${filter} ${SAM} | niceload --mem 500M samtools sort ${threads_option} -T ${tmpdir} -o ${SAM%.${ext}}_sorted.bam - || error_exit "samtools sort failed" + #cat ${SAM} | samtools view ${threads_option} -u ${filter} - | samtools sort ${threads_option} -o ${SAM%.${ext}}_sorted.bam - || error_exit "samtools sort failed" +elif [ -e ${SAM}.gz ] +then + zcat ${SAM}.gz | samtools view ${threads_option} -u ${filter} - | niceload --mem 500M samtools sort ${threads_option} -T ${tmpdir} -o ${SAM%.${ext}}_sorted.bam - || error_exit "samtools sort failed" +elif [ -e ${SAM}.bz2 ] +then + bzcat ${SAM}.bz2 | samtools view ${threads_option} -u ${filter} - | niceload --mem 500M samtools sort ${threads_option} -T ${tmpdir} -o ${SAM%.${ext}}_sorted.bam - || error_exit "samtools sort failed" +else + echo "${SAM} not found, neither .gz or .bz2 versions." + echo "Aborting." + exit 1 +fi +# There are random segmentation errors with samtools index +#samtools index ${SAM%.sam}_sorted.bam || error_exit "samtools index failed" +indexed="" +while [ ! ${indexed} ] +do + samtools index ${SAM%.${ext}}_sorted.bam && indexed="OK" + if [ ! ${indexed} ] + then + rm -f ${SAM%.${ext}}_sorted.bam.bai + echo "Indexing failed. Retrying" 1>&2 + fi +done || error_exit "samtools index failed" + +cleanup + +exit 0 diff --git a/setup.py b/setup.py index a82b55481b8b9fc0e949f9e3036afad20515366a..d66bee8b5da977260d7cbc6a73dc1882bf608fd5 100644 --- a/setup.py +++ b/setup.py @@ -18,6 +18,7 @@ setup( author_email="blaise.li@normalesup.org", license="MIT", packages=find_packages(), + scripts=["scripts/sam2indexedbam.sh"], install_requires=[ #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git@744dd79b579577cb6e131653260d7990946be3ad#egg=libworkflows-0.1", #"libworkflows @ git+https://gitlab+deploy-token-31:isEzpsgbNf2sJMdUDy2g@gitlab.pasteur.fr/bli/libworkflows.git#egg=libworkflows-0.1",