From 0f0690f0aedf6ff0956cf042b7456f036e1b5181 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Wed, 30 Oct 2019 15:56:50 +0100 Subject: [PATCH] Singularity definition and wrappers. --- singularity/run_Degradome-seq_pipeline | 1 + singularity/run_GRO-seq_pipeline | 1 + singularity/run_PRO-seq_pipeline | 1 + singularity/run_RNA-seq_pipeline | 1 + singularity/run_Ribo-seq_pipeline | 1 + singularity/run_iCLIP-seq_pipeline | 1 + singularity/run_iCLIP_pipeline | 1 + singularity/run_pipeline.def | 121 +++++++++++++++++++++++++ singularity/run_pipeline.sh | 88 ++++++++++++++++++ singularity/run_sRNA-seq_pipeline | 1 + singularity/run_small_RNA-seq_pipeline | 1 + 11 files changed, 218 insertions(+) create mode 120000 singularity/run_Degradome-seq_pipeline create mode 120000 singularity/run_GRO-seq_pipeline create mode 120000 singularity/run_PRO-seq_pipeline create mode 120000 singularity/run_RNA-seq_pipeline create mode 120000 singularity/run_Ribo-seq_pipeline create mode 120000 singularity/run_iCLIP-seq_pipeline create mode 120000 singularity/run_iCLIP_pipeline create mode 100644 singularity/run_pipeline.def create mode 100755 singularity/run_pipeline.sh create mode 120000 singularity/run_sRNA-seq_pipeline create mode 120000 singularity/run_small_RNA-seq_pipeline diff --git a/singularity/run_Degradome-seq_pipeline b/singularity/run_Degradome-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_Degradome-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_GRO-seq_pipeline b/singularity/run_GRO-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_GRO-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_PRO-seq_pipeline b/singularity/run_PRO-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_PRO-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_RNA-seq_pipeline b/singularity/run_RNA-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_RNA-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_Ribo-seq_pipeline b/singularity/run_Ribo-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_Ribo-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_iCLIP-seq_pipeline b/singularity/run_iCLIP-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_iCLIP-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_iCLIP_pipeline b/singularity/run_iCLIP_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_iCLIP_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_pipeline.def b/singularity/run_pipeline.def new file mode 100644 index 0000000..26b3e9d --- /dev/null +++ b/singularity/run_pipeline.def @@ -0,0 +1,121 @@ +Bootstrap:docker +From:debian:buster-slim +Stage: build-haskell + +%post + apt-get update -y + apt-get install -y curl libtinfo-dev + mkdir -p /root/.local/bin + export PATH="/root/.local/bin:${PATH}" + export PATH="/usr/local/bin:${PATH}" + curl -sSL https://get.haskellstack.org/ | sh + stack upgrade + mkdir -p /usr/local/src + cd /usr/local/src + # Getting the workflows + cd /usr/local/src + git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git + cd bioinfo_utils + git submodule update --init --remote --merge + # Note that currently only remove-duplicates-from-sorted-fastq + # and trim-t-tail-from-fastq are copied to the next stage + has_haskell_install=$(find . -name install.sh -print | xargs dirname | grep "Haskell") + for dir in ${has_haskell_install} + do + (cd ${dir} && grep "local" install.sh && ./install.sh) + done + +Bootstrap:docker +From:python:3.7-buster + +%files from build-haskell + /usr/local/bin/remove-duplicates-from-sorted-fastq + /usr/local/bin/trim-t-tail-from-fastq + +%post + apt-get -y update + # run_pipeline.sh (and maybe other tools) needs bc + apt-get -y install bc rsync + apt-get -y install bedops bedtools bowtie2 cutadapt subread hisat2 parallel samtools + # Some programs are not provided by debian + # mkdir -p /usr/local/src + ##################### + # Installing bioawk # + ##################### + cd /usr/local/src + git clone https://github.com/lh3/bioawk.git + cd bioawk + apt install -y byacc + make + ln -s /usr/local/src/bioawk/bioawk /usr/local/bin/. + ########################## + # Installing fastq-tools # + ########################## + cd /usr/local/src + wget --continue http://homes.cs.washington.edu/~dcjones/fastq-tools/fastq-tools-0.8.tar.gz + tar -xzf fastq-tools-0.8.tar.gz + cd fastq-tools-0.8 + ./configure + make + make install + ######################### + # Installing kent utils # + ######################### + cd /usr/local/src + #mkdir UCSC-tools + #cd UCSC-tools + rsync -azvP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/. + # It is possible to test whether the R install is already OK + deb_source="deb http://cran.irsn.fr/bin/linux/debian buster-cran35/" + echo ${deb_source} >> /etc/apt/sources.list.d/cran.list + apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF' + apt-get -y update + apt-get -y install r-base-core r-base-dev + R -e 'install.packages("BiocManager")' + R -e 'BiocManager::install(ask=FALSE)' + R -e 'BiocManager::install(c("docopt", "DESeq2"), ask=FALSE)' + + # To use the "local" python, not the system one. + export PATH="/usr/local/bin":$PATH + # To avoid using python things installed in the HOME of root + # (that will be mounted during singularity build) + export PYTHONNOUSERSITE=1 + # Needed to manually cythonize custom pybedtools before installing it + #/usr/bin/env python3 -m pip install Cython + #/usr/bin/env python3 -m pip install --global-option="cythonize" git+https://github.com/blaiseli/pybedtools.git@fix_missing_headers + # Getting custom pybedtools that includes *.h headers + #git clone https://github.com/blaiseli/pybedtools.git + #cd pybedtools + #git checkout fix_missing_headers + #/usr/bin/env python3 setup.py cythonize + #/usr/bin/env python3 -m pip install . + # Getting the workflows + cd /usr/local/src + #git clone --recurse-submodules https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git + git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git + cd bioinfo_utils + git submodule update --init --remote --merge + has_requirements=$(find . -name requirements.txt -print | xargs dirname) + for dir in ${has_requirements} + do + (cd ${dir} && pip install -r requirements.txt) + done + has_install=$(find . -name install.sh -print | xargs dirname | grep -v "Nim" | grep -v "Haskell") + for dir in ${has_install} + do + (cd ${dir} && ./install.sh) + done + +%environment + export LC_ALL=C + # To use the "local" python, not the system one. + export PATH="/usr/local/bin":$PATH + # To avoid using python things installed in the HOME of the user + # (that will be mounted during container execution) + export PYTHONNOUSERSITE=1 + export PATH=/usr/local/src/bioinfo_utils:"${PATH}" + +%runscript + cmd="${1}" + shift + exec /usr/local/src/bioinfo_utils/${cmd} "$@" diff --git a/singularity/run_pipeline.sh b/singularity/run_pipeline.sh new file mode 100755 index 0000000..256976d --- /dev/null +++ b/singularity/run_pipeline.sh @@ -0,0 +1,88 @@ +#!/bin/bash -l +# Usage: run_<something>-seq_pipeline <configuration_file> [extra arguments for snakemake] +# run_<something>-seq_pipeline are aliases to the present file. +# Each alias will run a specific pipeline, depending on its name. +# There may be several aliases for a same pipeline. +# sRNA-seq +# * run_sRNA-seq_pipeline +# * run_small_RNA-seq_pipeline +# GRO-seq +# * run_GRO-seq_pipeline +# * run_PRO-seq_pipeline +# RNA-seq +# * run_RNA-seq_pipeline +# Degradme-seq +# * run_Degradome-seq_pipeline +# Ribo-seq +# * run_Ribo-seq_pipeline +# iCLIP (which has a special "non-seq" name) +# * run_iCLIP-seq_pipeline +# * run_iCLIP_pipeline + +# http://linuxcommand.org/wss0150.php +PROGNAME=$(basename "${0}") + +function error_exit +{ +# ---------------------------------------------------------------- +# Function for exit due to fatal program error +# Accepts 1 argument: +# string containing descriptive error message +# ---------------------------------------------------------------- + echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2 + exit 1 +} + +# https://stackoverflow.com/a/1638397/1878788 +# Absolute path to this script +SCRIPT=$(readlink -f "${0}") +# Absolute path this script is in +BASEDIR=$(dirname "${SCRIPT}") +container="${BASEDIR}/run_pipeline" + + +# Do we have singularity? +singularity --version 2> /dev/null && have_singularity=1 + +if [ ! ${have_singularity} ] +then + install_doc="https://sylabs.io/guides/3.4/user-guide/quick_start.html#quick-installation-steps" + # Do we have an environment modules system? + module --version 2> /dev/null && have_modules=1 + if [ ${have_modules} ] + then + module load singularity || error_exit "singularity is needed to run the pipelines (see ${install_doc})" + else + error_exit "singularity is needed to run the pipelines (see ${install_doc})" + fi +fi + +if [ ! -e ${container} ] +then + if [ -e ${BASEDIR}/run_pipeline.def ] + then + echo "The container was not found. Trying to build it. This may take quite some time and requires sudoer's rights." + sudo singularity build ${BASEDIR}/run_pipeline ${BASEDIR}/run_pipeline.def || error_exit "The container could not be built." + else + error_exit "The container was not found, nor a definition file to build it." + fi +fi + +genome_dir="/pasteur/entites/Mhe/Genomes" +gene_lists_dir="/pasteur/entites/Mhe/Gene_lists" + +if [ ! -e ${genome_dir} -o ! -e ${gene_lists_dir} ] +then + error_exit "The pipelines will look for genome data in ${genome_dir} and gene lists in ${gene_lists_dir}. Make sure it's there." +fi + +# This script can be called from various symbolic links. +# The name of the link determines which snakefile to use. +# PRO-seq and GRO-seq are actually the same pipeline +# similarly for sRNA-seq and small_RNA-seq +# similarly for iCLIP-seq and iCLIP + +# -B /pasteur will mount /pasteur in the container +# so that it finds the Genome configuration and gene lists +# that are expected to be in a specific location there. +singularity run -B /pasteur ${container} ${PROGNAME} $@ diff --git a/singularity/run_sRNA-seq_pipeline b/singularity/run_sRNA-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_sRNA-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file diff --git a/singularity/run_small_RNA-seq_pipeline b/singularity/run_small_RNA-seq_pipeline new file mode 120000 index 0000000..f2500be --- /dev/null +++ b/singularity/run_small_RNA-seq_pipeline @@ -0,0 +1 @@ +run_pipeline.sh \ No newline at end of file -- GitLab