Commit 0f0690f0 authored by Blaise Li's avatar Blaise Li
Browse files

Singularity definition and wrappers.

parent 3f5db2cd
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
Bootstrap:docker
From:debian:buster-slim
Stage: build-haskell
%post
apt-get update -y
apt-get install -y curl libtinfo-dev
mkdir -p /root/.local/bin
export PATH="/root/.local/bin:${PATH}"
export PATH="/usr/local/bin:${PATH}"
curl -sSL https://get.haskellstack.org/ | sh
stack upgrade
mkdir -p /usr/local/src
cd /usr/local/src
# Getting the workflows
cd /usr/local/src
git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
cd bioinfo_utils
git submodule update --init --remote --merge
# Note that currently only remove-duplicates-from-sorted-fastq
# and trim-t-tail-from-fastq are copied to the next stage
has_haskell_install=$(find . -name install.sh -print | xargs dirname | grep "Haskell")
for dir in ${has_haskell_install}
do
(cd ${dir} && grep "local" install.sh && ./install.sh)
done
Bootstrap:docker
From:python:3.7-buster
%files from build-haskell
/usr/local/bin/remove-duplicates-from-sorted-fastq
/usr/local/bin/trim-t-tail-from-fastq
%post
apt-get -y update
# run_pipeline.sh (and maybe other tools) needs bc
apt-get -y install bc rsync
apt-get -y install bedops bedtools bowtie2 cutadapt subread hisat2 parallel samtools
# Some programs are not provided by debian
# mkdir -p /usr/local/src
#####################
# Installing bioawk #
#####################
cd /usr/local/src
git clone https://github.com/lh3/bioawk.git
cd bioawk
apt install -y byacc
make
ln -s /usr/local/src/bioawk/bioawk /usr/local/bin/.
##########################
# Installing fastq-tools #
##########################
cd /usr/local/src
wget --continue http://homes.cs.washington.edu/~dcjones/fastq-tools/fastq-tools-0.8.tar.gz
tar -xzf fastq-tools-0.8.tar.gz
cd fastq-tools-0.8
./configure
make
make install
#########################
# Installing kent utils #
#########################
cd /usr/local/src
#mkdir UCSC-tools
#cd UCSC-tools
rsync -azvP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/.
# It is possible to test whether the R install is already OK
deb_source="deb http://cran.irsn.fr/bin/linux/debian buster-cran35/"
echo ${deb_source} >> /etc/apt/sources.list.d/cran.list
apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF'
apt-get -y update
apt-get -y install r-base-core r-base-dev
R -e 'install.packages("BiocManager")'
R -e 'BiocManager::install(ask=FALSE)'
R -e 'BiocManager::install(c("docopt", "DESeq2"), ask=FALSE)'
# To use the "local" python, not the system one.
export PATH="/usr/local/bin":$PATH
# To avoid using python things installed in the HOME of root
# (that will be mounted during singularity build)
export PYTHONNOUSERSITE=1
# Needed to manually cythonize custom pybedtools before installing it
#/usr/bin/env python3 -m pip install Cython
#/usr/bin/env python3 -m pip install --global-option="cythonize" git+https://github.com/blaiseli/pybedtools.git@fix_missing_headers
# Getting custom pybedtools that includes *.h headers
#git clone https://github.com/blaiseli/pybedtools.git
#cd pybedtools
#git checkout fix_missing_headers
#/usr/bin/env python3 setup.py cythonize
#/usr/bin/env python3 -m pip install .
# Getting the workflows
cd /usr/local/src
#git clone --recurse-submodules https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
cd bioinfo_utils
git submodule update --init --remote --merge
has_requirements=$(find . -name requirements.txt -print | xargs dirname)
for dir in ${has_requirements}
do
(cd ${dir} && pip install -r requirements.txt)
done
has_install=$(find . -name install.sh -print | xargs dirname | grep -v "Nim" | grep -v "Haskell")
for dir in ${has_install}
do
(cd ${dir} && ./install.sh)
done
%environment
export LC_ALL=C
# To use the "local" python, not the system one.
export PATH="/usr/local/bin":$PATH
# To avoid using python things installed in the HOME of the user
# (that will be mounted during container execution)
export PYTHONNOUSERSITE=1
export PATH=/usr/local/src/bioinfo_utils:"${PATH}"
%runscript
cmd="${1}"
shift
exec /usr/local/src/bioinfo_utils/${cmd} "$@"
#!/bin/bash -l
# Usage: run_<something>-seq_pipeline <configuration_file> [extra arguments for snakemake]
# run_<something>-seq_pipeline are aliases to the present file.
# Each alias will run a specific pipeline, depending on its name.
# There may be several aliases for a same pipeline.
# sRNA-seq
# * run_sRNA-seq_pipeline
# * run_small_RNA-seq_pipeline
# GRO-seq
# * run_GRO-seq_pipeline
# * run_PRO-seq_pipeline
# RNA-seq
# * run_RNA-seq_pipeline
# Degradme-seq
# * run_Degradome-seq_pipeline
# Ribo-seq
# * run_Ribo-seq_pipeline
# iCLIP (which has a special "non-seq" name)
# * run_iCLIP-seq_pipeline
# * run_iCLIP_pipeline
# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename "${0}")
function error_exit
{
# ----------------------------------------------------------------
# Function for exit due to fatal program error
# Accepts 1 argument:
# string containing descriptive error message
# ----------------------------------------------------------------
echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
exit 1
}
# https://stackoverflow.com/a/1638397/1878788
# Absolute path to this script
SCRIPT=$(readlink -f "${0}")
# Absolute path this script is in
BASEDIR=$(dirname "${SCRIPT}")
container="${BASEDIR}/run_pipeline"
# Do we have singularity?
singularity --version 2> /dev/null && have_singularity=1
if [ ! ${have_singularity} ]
then
install_doc="https://sylabs.io/guides/3.4/user-guide/quick_start.html#quick-installation-steps"
# Do we have an environment modules system?
module --version 2> /dev/null && have_modules=1
if [ ${have_modules} ]
then
module load singularity || error_exit "singularity is needed to run the pipelines (see ${install_doc})"
else
error_exit "singularity is needed to run the pipelines (see ${install_doc})"
fi
fi
if [ ! -e ${container} ]
then
if [ -e ${BASEDIR}/run_pipeline.def ]
then
echo "The container was not found. Trying to build it. This may take quite some time and requires sudoer's rights."
sudo singularity build ${BASEDIR}/run_pipeline ${BASEDIR}/run_pipeline.def || error_exit "The container could not be built."
else
error_exit "The container was not found, nor a definition file to build it."
fi
fi
genome_dir="/pasteur/entites/Mhe/Genomes"
gene_lists_dir="/pasteur/entites/Mhe/Gene_lists"
if [ ! -e ${genome_dir} -o ! -e ${gene_lists_dir} ]
then
error_exit "The pipelines will look for genome data in ${genome_dir} and gene lists in ${gene_lists_dir}. Make sure it's there."
fi
# This script can be called from various symbolic links.
# The name of the link determines which snakefile to use.
# PRO-seq and GRO-seq are actually the same pipeline
# similarly for sRNA-seq and small_RNA-seq
# similarly for iCLIP-seq and iCLIP
# -B /pasteur will mount /pasteur in the container
# so that it finds the Genome configuration and gene lists
# that are expected to be in a specific location there.
singularity run -B /pasteur ${container} ${PROGNAME} $@
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment