run_pipeline.sh 3.63 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
# Usage: run_pipeline.sh [<snakefile>] <configuration_file> [extra arguments for snakemake]
# If no snakefile is provided, the script should be called from one of its aliases.
# Each alias will run a specific pipeline, depending on its name.

# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename "${0}")

function error_exit
{
#	----------------------------------------------------------------
#	Function for exit due to fatal program error
#		Accepts 1 argument:
#			string containing descriptive error message
#	----------------------------------------------------------------
    echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
    exit 1
}

# https://stackoverflow.com/a/1638397/1878788
# Absolute path to this script
SCRIPT=$(readlink -f "${0}")
# Absolute path this script is in
BASEDIR=$(dirname "${SCRIPT}")

# This script can be called from various symbolic links.
# The name of the link determines which snakefile to use.
# PRO-seq and GRO-seq are actually the same pipeline
# similarly for sRNA-seq and small_RNA-seq
# similarly for iCLIP-seq and iCLIP
case "${PROGNAME}" in
    "run_RNA-seq_pipeline")
        snakefile="${BASEDIR}/RNA-seq/RNA-seq.snakefile"
        ;;
    "run_GRO-seq_pipeline")
        snakefile="${BASEDIR}/PRO-seq/GRO-seq.snakefile"
        ;;
    "run_PRO-seq_pipeline")
        snakefile="${BASEDIR}/PRO-seq/PRO-seq.snakefile"
        ;;
    "run_small_RNA-seq_pipeline")
        snakefile="${BASEDIR}/small_RNA-seq/small_RNA-seq.snakefile"
        ;;
    "run_sRNA-seq_pipeline")
        snakefile="${BASEDIR}/sRNA-seq/sRNA-seq.snakefile"
        ;;
    "run_iCLIP-seq_pipeline")
        snakefile="${BASEDIR}/iCLIP-seq/iCLIP-seq.snakefile"
        ;;
    "run_iCLIP_pipeline")
        snakefile="${BASEDIR}/iCLIP/iCLIP.snakefile"
        ;;
    "run_Ribo-seq_pipeline")
        snakefile="${BASEDIR}/Ribo-seq/Ribo-seq.snakefile"
        ;;
56
57
58
    "run_Degradome-seq_pipeline")
        snakefile="${BASEDIR}/Degradome-seq/Degradome-seq.snakefile"
        ;;
59
60
61
62
63
64
65
66
67
68
69
    *)
        snakefile="${1}"
        shift
        ;;
esac

configfile="${1}"
shift

if [ -e ${configfile} ]
then
Blaise Li's avatar
Blaise Li committed
70
71
72
73
74
75
76
77
    kilobytes_tot=$(mawk '$1 == "MemTotal:" {print $2}' /proc/meminfo)
    # Some rules were given a "mem_mb" resource section based on the "max_vms" benchmarking result.
    # See the /pasteur/homes/bli/Documents/Informatique/benchmarks/Pipeline_benchmarking/Pipeline_benchmarking.ipynb jupyter notebook.
    # These values are in megabytes (https://stackoverflow.com/a/47201241/1878788)
    # We divide the total memory (in kB) by 1100 instead of 1000
    # to avoid pretending that we have all this memory available for snakemake rules.
    megabytes_resource=$(echo "${kilobytes_tot} / 1100" | bc)
    cmd="snakemake -s ${snakefile} --configfile ${configfile} --resources mem_mb=${megabytes_resource} $@"
78
79
80
else
    error_exit "Pipeline configuration file ${configfile} not found."
fi
Blaise Li's avatar
Blaise Li committed
81

82
# Determine the output directory and where to log the pipeline (fragile!)
Blaise Li's avatar
Blaise Li committed
83
output_dir=$(grep "output_dir" "${configfile}" | mawk '{print $NF}' | sed 's/,$//' | sed 's/"//g')
84
85
start_day=$(date +"%Y-%m-%d")
find_older_output="find ${output_dir} -depth ! -newermt ${start_day} -print"
Blaise Li's avatar
Blaise Li committed
86
log_base="${output_dir}/$(date +"%d%m%y_%Hh%Mm")"
87
mkdir -p ${output_dir}
Blaise Li's avatar
Blaise Li committed
88
89
echo ${cmd} > ${log_base}.log
# https://stackoverflow.com/a/692407/1878788
Blaise Li's avatar
Blaise Li committed
90
eval ${cmd} > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
91

92
echo -e "This run started on ${start_day}.\nIf you want to find all older output, you can run the following command:\n${find_older_output}\n(Use -delete instead of -print to remove those files (do this only in case of full output update).)" 1>&2
93
94


95
exit 0