From 0f0690f0aedf6ff0956cf042b7456f036e1b5181 Mon Sep 17 00:00:00 2001
From: Blaise Li <blaise.li__git@nsup.org>
Date: Wed, 30 Oct 2019 15:56:50 +0100
Subject: [PATCH] Singularity definition and wrappers.

---
 singularity/run_Degradome-seq_pipeline |   1 +
 singularity/run_GRO-seq_pipeline       |   1 +
 singularity/run_PRO-seq_pipeline       |   1 +
 singularity/run_RNA-seq_pipeline       |   1 +
 singularity/run_Ribo-seq_pipeline      |   1 +
 singularity/run_iCLIP-seq_pipeline     |   1 +
 singularity/run_iCLIP_pipeline         |   1 +
 singularity/run_pipeline.def           | 121 +++++++++++++++++++++++++
 singularity/run_pipeline.sh            |  88 ++++++++++++++++++
 singularity/run_sRNA-seq_pipeline      |   1 +
 singularity/run_small_RNA-seq_pipeline |   1 +
 11 files changed, 218 insertions(+)
 create mode 120000 singularity/run_Degradome-seq_pipeline
 create mode 120000 singularity/run_GRO-seq_pipeline
 create mode 120000 singularity/run_PRO-seq_pipeline
 create mode 120000 singularity/run_RNA-seq_pipeline
 create mode 120000 singularity/run_Ribo-seq_pipeline
 create mode 120000 singularity/run_iCLIP-seq_pipeline
 create mode 120000 singularity/run_iCLIP_pipeline
 create mode 100644 singularity/run_pipeline.def
 create mode 100755 singularity/run_pipeline.sh
 create mode 120000 singularity/run_sRNA-seq_pipeline
 create mode 120000 singularity/run_small_RNA-seq_pipeline

diff --git a/singularity/run_Degradome-seq_pipeline b/singularity/run_Degradome-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_Degradome-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_GRO-seq_pipeline b/singularity/run_GRO-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_GRO-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_PRO-seq_pipeline b/singularity/run_PRO-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_PRO-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_RNA-seq_pipeline b/singularity/run_RNA-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_RNA-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_Ribo-seq_pipeline b/singularity/run_Ribo-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_Ribo-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_iCLIP-seq_pipeline b/singularity/run_iCLIP-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_iCLIP-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_iCLIP_pipeline b/singularity/run_iCLIP_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_iCLIP_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_pipeline.def b/singularity/run_pipeline.def
new file mode 100644
index 0000000..26b3e9d
--- /dev/null
+++ b/singularity/run_pipeline.def
@@ -0,0 +1,121 @@
+Bootstrap:docker
+From:debian:buster-slim
+Stage: build-haskell
+
+%post
+	apt-get update -y
+	apt-get install -y curl libtinfo-dev
+	mkdir -p /root/.local/bin
+	export PATH="/root/.local/bin:${PATH}"
+	export PATH="/usr/local/bin:${PATH}"
+	curl -sSL https://get.haskellstack.org/ | sh
+	stack upgrade
+	mkdir -p /usr/local/src
+	cd /usr/local/src
+	# Getting the workflows
+	cd /usr/local/src
+	git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
+	cd bioinfo_utils
+	git submodule update --init --remote --merge
+	# Note that currently only remove-duplicates-from-sorted-fastq
+	# and trim-t-tail-from-fastq are copied to the next stage
+	has_haskell_install=$(find . -name install.sh -print | xargs dirname | grep "Haskell")
+	for dir in ${has_haskell_install}
+	do
+		(cd ${dir} && grep "local" install.sh && ./install.sh)
+	done
+
+Bootstrap:docker
+From:python:3.7-buster
+
+%files from build-haskell
+	/usr/local/bin/remove-duplicates-from-sorted-fastq
+	/usr/local/bin/trim-t-tail-from-fastq
+
+%post
+	apt-get -y update
+	# run_pipeline.sh (and maybe other tools) needs bc
+	apt-get -y install bc rsync
+	apt-get -y install bedops bedtools bowtie2 cutadapt subread hisat2 parallel samtools
+	# Some programs are not provided by debian
+	# mkdir -p /usr/local/src
+	#####################
+	# Installing bioawk #
+	#####################
+	cd /usr/local/src
+	git clone https://github.com/lh3/bioawk.git
+	cd bioawk
+	apt install -y byacc
+	make
+	ln -s /usr/local/src/bioawk/bioawk /usr/local/bin/.
+	##########################
+	# Installing fastq-tools #
+	##########################
+	cd /usr/local/src
+	wget --continue http://homes.cs.washington.edu/~dcjones/fastq-tools/fastq-tools-0.8.tar.gz
+	tar -xzf fastq-tools-0.8.tar.gz
+	cd fastq-tools-0.8
+	./configure
+	make
+	make install
+	#########################
+	# Installing kent utils #
+	#########################
+	cd /usr/local/src
+	#mkdir UCSC-tools
+	#cd UCSC-tools
+	rsync -azvP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/.
+	# It is possible to test whether the R install is already OK
+	deb_source="deb http://cran.irsn.fr/bin/linux/debian buster-cran35/"
+	echo ${deb_source} >> /etc/apt/sources.list.d/cran.list
+	apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF'
+	apt-get -y update
+	apt-get -y install r-base-core r-base-dev
+	R -e 'install.packages("BiocManager")'
+	R -e 'BiocManager::install(ask=FALSE)'
+	R -e 'BiocManager::install(c("docopt", "DESeq2"), ask=FALSE)'
+
+	# To use the "local" python, not the system one.
+	export PATH="/usr/local/bin":$PATH
+	# To avoid using python things installed in the HOME of root
+	# (that will be mounted during singularity build)
+	export PYTHONNOUSERSITE=1
+	# Needed to manually cythonize custom pybedtools before installing it
+	#/usr/bin/env python3 -m pip install Cython
+	#/usr/bin/env python3 -m pip install --global-option="cythonize" git+https://github.com/blaiseli/pybedtools.git@fix_missing_headers
+	# Getting custom pybedtools that includes *.h headers
+	#git clone https://github.com/blaiseli/pybedtools.git
+	#cd pybedtools
+	#git checkout fix_missing_headers
+	#/usr/bin/env python3 setup.py cythonize
+	#/usr/bin/env python3 -m pip install .
+	# Getting the workflows
+	cd /usr/local/src
+	#git clone --recurse-submodules https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
+	git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
+	cd bioinfo_utils
+	git submodule update --init --remote --merge
+	has_requirements=$(find . -name requirements.txt -print | xargs dirname)
+	for dir in ${has_requirements}
+	do
+		(cd ${dir} && pip install -r requirements.txt)
+	done
+	has_install=$(find . -name install.sh -print | xargs dirname | grep -v "Nim" | grep -v "Haskell")
+	for dir in ${has_install}
+	do
+		(cd ${dir} && ./install.sh)
+	done
+
+%environment
+	export LC_ALL=C
+	# To use the "local" python, not the system one.
+	export PATH="/usr/local/bin":$PATH
+	# To avoid using python things installed in the HOME of the user
+	# (that will be mounted during container execution)
+	export PYTHONNOUSERSITE=1
+	export PATH=/usr/local/src/bioinfo_utils:"${PATH}"
+
+%runscript
+	cmd="${1}"
+	shift
+	exec /usr/local/src/bioinfo_utils/${cmd} "$@"
diff --git a/singularity/run_pipeline.sh b/singularity/run_pipeline.sh
new file mode 100755
index 0000000..256976d
--- /dev/null
+++ b/singularity/run_pipeline.sh
@@ -0,0 +1,88 @@
+#!/bin/bash -l
+# Usage: run_<something>-seq_pipeline <configuration_file> [extra arguments for snakemake]
+# run_<something>-seq_pipeline are aliases to the present file.
+# Each alias will run a specific pipeline, depending on its name.
+# There may be several aliases for a same pipeline.
+# sRNA-seq
+# * run_sRNA-seq_pipeline
+# * run_small_RNA-seq_pipeline
+# GRO-seq
+# * run_GRO-seq_pipeline
+# * run_PRO-seq_pipeline
+# RNA-seq
+# * run_RNA-seq_pipeline
+# Degradme-seq
+# * run_Degradome-seq_pipeline
+# Ribo-seq
+# * run_Ribo-seq_pipeline
+# iCLIP (which has a special "non-seq" name)
+# * run_iCLIP-seq_pipeline
+# * run_iCLIP_pipeline
+
+# http://linuxcommand.org/wss0150.php
+PROGNAME=$(basename "${0}")
+
+function error_exit
+{
+#	----------------------------------------------------------------
+#	Function for exit due to fatal program error
+#		Accepts 1 argument:
+#			string containing descriptive error message
+#	----------------------------------------------------------------
+    echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
+    exit 1
+}
+
+# https://stackoverflow.com/a/1638397/1878788
+# Absolute path to this script
+SCRIPT=$(readlink -f "${0}")
+# Absolute path this script is in
+BASEDIR=$(dirname "${SCRIPT}")
+container="${BASEDIR}/run_pipeline"
+
+
+# Do we have singularity?
+singularity --version 2> /dev/null && have_singularity=1 
+
+if [ ! ${have_singularity} ]
+then
+    install_doc="https://sylabs.io/guides/3.4/user-guide/quick_start.html#quick-installation-steps"
+    # Do we have an environment modules system?
+    module --version 2> /dev/null && have_modules=1 
+    if [ ${have_modules} ]
+    then
+        module load singularity || error_exit "singularity is needed to run the pipelines (see ${install_doc})"
+    else
+	error_exit "singularity is needed to run the pipelines (see ${install_doc})"
+    fi
+fi
+
+if [ ! -e ${container} ]
+then
+    if [ -e ${BASEDIR}/run_pipeline.def ]
+    then
+        echo "The container was not found. Trying to build it. This may take quite some time and requires sudoer's rights."
+        sudo singularity build ${BASEDIR}/run_pipeline ${BASEDIR}/run_pipeline.def || error_exit "The container could not be built."
+    else
+        error_exit "The container was not found, nor a definition file to build it."
+    fi
+fi
+
+genome_dir="/pasteur/entites/Mhe/Genomes"
+gene_lists_dir="/pasteur/entites/Mhe/Gene_lists"
+
+if [ ! -e ${genome_dir} -o ! -e ${gene_lists_dir} ]
+then
+    error_exit "The pipelines will look for genome data in ${genome_dir} and gene lists in ${gene_lists_dir}. Make sure it's there."
+fi
+
+# This script can be called from various symbolic links.
+# The name of the link determines which snakefile to use.
+# PRO-seq and GRO-seq are actually the same pipeline
+# similarly for sRNA-seq and small_RNA-seq
+# similarly for iCLIP-seq and iCLIP
+
+# -B /pasteur will mount /pasteur in the container
+# so that it finds the Genome configuration and gene lists
+# that are expected to be in a specific location there.
+singularity run -B /pasteur ${container} ${PROGNAME} $@
diff --git a/singularity/run_sRNA-seq_pipeline b/singularity/run_sRNA-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_sRNA-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
diff --git a/singularity/run_small_RNA-seq_pipeline b/singularity/run_small_RNA-seq_pipeline
new file mode 120000
index 0000000..f2500be
--- /dev/null
+++ b/singularity/run_small_RNA-seq_pipeline
@@ -0,0 +1 @@
+run_pipeline.sh
\ No newline at end of file
-- 
GitLab