run_pipeline.sh

#!/bin/bash -l
# Usage: run_<something>-seq_pipeline <configuration_file> [extra arguments for snakemake]
# run_<something>-seq_pipeline are aliases to the present file.
# Each alias will run a specific pipeline, depending on its name.
# There may be several aliases for a same pipeline.
# sRNA-seq
# * run_sRNA-seq_pipeline
# * run_small_RNA-seq_pipeline
# GRO-seq
# * run_GRO-seq_pipeline
# * run_PRO-seq_pipeline
# RNA-seq
# * run_RNA-seq_pipeline
# Degradme-seq
# * run_Degradome-seq_pipeline
# Ribo-seq
# * run_Ribo-seq_pipeline
# iCLIP (which has a special "non-seq" name)
# * run_iCLIP-seq_pipeline
# * run_iCLIP_pipeline
#
# Copyright (C) 2020 Blaise Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename "${0}")

function error_exit
{
#	----------------------------------------------------------------
#	Function for exit due to fatal program error
#		Accepts 1 argument:
#			string containing descriptive error message
#	----------------------------------------------------------------
    echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
    exit 1
}

# https://stackoverflow.com/a/1638397/1878788
# Absolute path to this script
SCRIPT=$(readlink -f "${0}")
# Absolute path this script is in
BASEDIR=$(dirname "${SCRIPT}")
container="${BASEDIR}/run_pipeline"
wrapper="${BASEDIR}/wrap_in_container.sh"
cluster_config="${BASEDIR}/cluster_config.json"


# Do we have singularity?
singularity --version 2> /dev/null && have_singularity=1

if [ ! ${have_singularity} ]
then
    install_doc="https://sylabs.io/guides/3.4/user-guide/quick_start.html#quick-installation-steps"
    # Do we have an environment modules system?
    module --version 2> /dev/null && have_modules=1
    if [ ${have_modules} ]
    then
        module load apptainer || error_exit "singularity is needed to run the pipelines (see ${install_doc})"
    else
        error_exit "singularity is needed to run the pipelines (see ${install_doc})"
    fi
fi

if [ ! -e ${wrapper} ]
then
    error_exit "The wrapper script ${wrapper} was not found."
fi

if [ ! -e ${cluster_config} ]
then
    error_exit "The cluster configuration ${cluster_config} was not found."
fi

if [ ! -e ${container} ]
then
    if [ -e ${BASEDIR}/run_pipeline.def ]
    then
        echo "The container was not found. Trying to build it. This may take quite some time and requires sudoer's rights."
        sudo singularity build ${BASEDIR}/run_pipeline ${BASEDIR}/run_pipeline.def || error_exit "The container could not be built."
    else
        error_exit "The container ${container} was not found, nor a definition file to build it."
    fi
fi

# This should actually be taken from the pipeline config file.
[[ ${GENOME_DIR} ]] || GENOME_DIR="/pasteur/entites/Mhe/Genomes"
[[ ${GENE_LISTS_DIR} ]] || GENE_LISTS_DIR="/pasteur/entites/Mhe/Gene_lists"

if [ ! -e ${GENOME_DIR} -o ! -e ${GENE_LISTS_DIR} ]
then
    error_exit "The pipelines will look for genome data in ${GENOME_DIR} and gene lists in ${GENE_LISTS_DIR}. Make sure it's there."
fi

#############################################
# To run with sbatch on slurm queing system #
#############################################
[[ ${QOS} ]] || QOS="normal"
[[ ${PART} ]] || PART="common"

cluster_opts="--cluster \\\'sbatch --mem={cluster.ram} --cpus-per-task={threads} --job-name={rule}-{wildcards} --qos=${QOS} --part=${PART} ${wrapper} ${container}\\\' --cluster-config ${cluster_config}"
#cluster_opts="--cores 20 --cluster \'sbatch --mem={cluster.ram} --cpus-per-task={threads} --job-name={rule}-{wildcards} --qos=${QOS} --part=${PART} --mpi=none\' -j 300"
#cmd="APPTAINERENV_USER=${USER} apptainer run --cleanenv -B /opt/hpc/slurm -B /var/run/munge -B /pasteur ${container} ${PROGNAME} ${pipeline_config} ${cluster_opts} --cluster-config ${cluster_config}"
#cmd="APPTAINERENV_USER=${USER} apptainer run --cleanenv -B /opt/hpc/slurm -B /var/run/munge -B /pasteur ${container} ${PROGNAME} $@"
cmd="APPTAINERENV_USER=${USER} apptainer run -B /opt/hpc/slurm -B /var/run/munge -B /pasteur ${container} ${PROGNAME} $@ ${cluster_opts}"

# This script can be called from various symbolic links.
# The name of the link determines which snakefile to use.
# PRO-seq and GRO-seq are actually the same pipeline
# similarly for sRNA-seq and small_RNA-seq
# similarly for iCLIP-seq and iCLIP

# -B /pasteur will mount /pasteur in the container
# so that it finds the Genome configuration and gene lists
# that are expected to be in a specific location there.
# singularity run -B /pasteur -B /run/shm:/run/shm ${container} ${PROGNAME} $@
#[[ $(hostname) = "pisa" ]] && SINGULARITYENV_USER=${USER} singularity run --cleanenv -B /pasteur -B /run/shm:/run/shm ${container} ${PROGNAME} $@ || sbatch --qos=${QOS} --part=${PART} --wrap="${cmd}"
[[ $(hostname) = "pisa" ]] && SINGULARITYENV_USER=${USER} singularity run -B /pasteur -B /run/shm:/run/shm ${container} ${PROGNAME} $@ || sbatch --qos=${QOS} --part=${PART} --wrap="${cmd}"

exit 0