Skip to content
Snippets Groups Projects
Select Git revision
  • bd8658548afa825d92f3f15306b8a8873ff9adac
  • master default protected
2 results

run_pipeline.sh

Blame
  • run_pipeline.sh 4.23 KiB
    #!/bin/bash
    # Usage: run_pipeline.sh [<snakefile>] <configuration_file> [extra arguments for snakemake]
    # If no snakefile is provided, the script should be called from one of its aliases.
    # Each alias will run a specific pipeline, depending on its name.
    
    # http://linuxcommand.org/wss0150.php
    PROGNAME=$(basename "${0}")
    
    function error_exit
    {
    #	----------------------------------------------------------------
    #	Function for exit due to fatal program error
    #		Accepts 1 argument:
    #			string containing descriptive error message
    #	----------------------------------------------------------------
        echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
        exit 1
    }
    
    # https://stackoverflow.com/a/1638397/1878788
    # Absolute path to this script
    SCRIPT=$(readlink -f "${0}")
    # Absolute path this script is in
    BASEDIR=$(dirname "${SCRIPT}")
    
    # This script can be called from various symbolic links.
    # The name of the link determines which snakefile to use.
    # PRO-seq and GRO-seq are actually the same pipeline
    # similarly for sRNA-seq and small_RNA-seq
    # similarly for iCLIP-seq and iCLIP
    case "${PROGNAME}" in
        "run_RNA-seq_pipeline")
            snakefile="${BASEDIR}/RNA-seq/RNA-seq.snakefile"
            ;;
        "run_GRO-seq_pipeline")
            snakefile="${BASEDIR}/PRO-seq/GRO-seq.snakefile"
            ;;
        "run_PRO-seq_pipeline")
            snakefile="${BASEDIR}/PRO-seq/PRO-seq.snakefile"
            ;;
        "run_small_RNA-seq_pipeline")
            snakefile="${BASEDIR}/small_RNA-seq/small_RNA-seq.snakefile"
            ;;
        "run_sRNA-seq_pipeline")
            snakefile="${BASEDIR}/sRNA-seq/sRNA-seq.snakefile"
            ;;
        "run_iCLIP-seq_pipeline")
            snakefile="${BASEDIR}/iCLIP-seq/iCLIP-seq.snakefile"
            ;;
        "run_iCLIP_pipeline")
            snakefile="${BASEDIR}/iCLIP/iCLIP.snakefile"
            ;;
        "run_Ribo-seq_pipeline")
            snakefile="${BASEDIR}/Ribo-seq/Ribo-seq.snakefile"
            ;;
        "run_Degradome-seq_pipeline")
            snakefile="${BASEDIR}/Degradome-seq/Degradome-seq.snakefile"
            ;;
        *)
            snakefile="${1}"
            shift
            ;;
    esac
    
    configfile="${1}"
    shift
    
    if [ -e ${configfile} ]
    then
        echo "Pipeline configuration found: ${configfile}"
    else
        error_exit "Pipeline configuration file ${configfile} not found."
    fi
    
    # Determine the output directory and where to log the pipeline (fragile!)
    output_dir=$(grep "output_dir" "${configfile}" | mawk '{print $NF}' | sed 's/,$//' | sed 's/"//g')
    mkdir -p ${output_dir}
    start_day=$(date +"%Y-%m-%d")
    log_base="${output_dir}/$(date +"%d%m%y_%Hh%Mm")"
    
    config_base=$(basename ${configfile})
    config_snapshot="${output_dir}/${config_base}"
    echo "Saving a local copy of the configuration in ${config_snapshot}"
    cp -f ${configfile} ${config_snapshot}
    snakefile_base=$(basename ${snakefile})
    snakefile_snapshot="${output_dir}/${snakefile_base}"
    echo "Saving a local copy of the snakefile in ${snakefile_snapshot}"
    cp -f ${snakefile} ${snakefile_snapshot}
    
    kilobytes_tot=$(mawk '$1 == "MemTotal:" {print $2}' /proc/meminfo)
    # Some rules were given a "mem_mb" resource section based on the "max_vms" benchmarking result.
    # See the /pasteur/homes/bli/Documents/Informatique/benchmarks/Pipeline_benchmarking/Pipeline_benchmarking.ipynb jupyter notebook.
    # These values are in megabytes (https://stackoverflow.com/a/47201241/1878788)
    # We divide the total memory (in kB) by 1100 instead of 1000
    # to avoid pretending that we have all this memory available for snakemake rules.
    megabytes_resource=$(echo "${kilobytes_tot} / 1100" | bc)
    
    #cmd="(cd ${output_dir}; snakemake -s ${snakefile} --configfile ${config_base} --resources mem_mb=${megabytes_resource} $@)"
    # TODO: check that this works
    cmd="(cd ${output_dir}; snakemake -s ${snakefile_base} --configfile ${config_base} --resources mem_mb=${megabytes_resource} $@)"
    
    echo ${cmd} > ${log_base}.log
    # https://unix.stackexchange.com/a/245610/55127
    # https://stackoverflow.com/a/692407/1878788
    # Migh make things too slow?
    #eval "niceload --mem 500M ${cmd}" > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
    eval ${cmd} > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
    end_day=$(date +"%Y-%m-%d")
    
    echo -e "This run started on ${start_day} and ended on ${end_day}.\n" 1>&2
    
    
    exit 0