Skip to content
Snippets Groups Projects
Commit 0f0690f0 authored by Blaise Li's avatar Blaise Li
Browse files

Singularity definition and wrappers.

parent 3f5db2cd
No related branches found
No related tags found
No related merge requests found
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
Bootstrap:docker
From:debian:buster-slim
Stage: build-haskell
%post
apt-get update -y
apt-get install -y curl libtinfo-dev
mkdir -p /root/.local/bin
export PATH="/root/.local/bin:${PATH}"
export PATH="/usr/local/bin:${PATH}"
curl -sSL https://get.haskellstack.org/ | sh
stack upgrade
mkdir -p /usr/local/src
cd /usr/local/src
# Getting the workflows
cd /usr/local/src
git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
cd bioinfo_utils
git submodule update --init --remote --merge
# Note that currently only remove-duplicates-from-sorted-fastq
# and trim-t-tail-from-fastq are copied to the next stage
has_haskell_install=$(find . -name install.sh -print | xargs dirname | grep "Haskell")
for dir in ${has_haskell_install}
do
(cd ${dir} && grep "local" install.sh && ./install.sh)
done
Bootstrap:docker
From:python:3.7-buster
%files from build-haskell
/usr/local/bin/remove-duplicates-from-sorted-fastq
/usr/local/bin/trim-t-tail-from-fastq
%post
apt-get -y update
# run_pipeline.sh (and maybe other tools) needs bc
apt-get -y install bc rsync
apt-get -y install bedops bedtools bowtie2 cutadapt subread hisat2 parallel samtools
# Some programs are not provided by debian
# mkdir -p /usr/local/src
#####################
# Installing bioawk #
#####################
cd /usr/local/src
git clone https://github.com/lh3/bioawk.git
cd bioawk
apt install -y byacc
make
ln -s /usr/local/src/bioawk/bioawk /usr/local/bin/.
##########################
# Installing fastq-tools #
##########################
cd /usr/local/src
wget --continue http://homes.cs.washington.edu/~dcjones/fastq-tools/fastq-tools-0.8.tar.gz
tar -xzf fastq-tools-0.8.tar.gz
cd fastq-tools-0.8
./configure
make
make install
#########################
# Installing kent utils #
#########################
cd /usr/local/src
#mkdir UCSC-tools
#cd UCSC-tools
rsync -azvP rsync://hgdownload.soe.ucsc.edu/genome/admin/exe/linux.x86_64/bedGraphToBigWig /usr/local/bin/.
# It is possible to test whether the R install is already OK
deb_source="deb http://cran.irsn.fr/bin/linux/debian buster-cran35/"
echo ${deb_source} >> /etc/apt/sources.list.d/cran.list
apt-key adv --keyserver keys.gnupg.net --recv-key 'E19F5F87128899B192B1A2C2AD5F960A256A04AF'
apt-get -y update
apt-get -y install r-base-core r-base-dev
R -e 'install.packages("BiocManager")'
R -e 'BiocManager::install(ask=FALSE)'
R -e 'BiocManager::install(c("docopt", "DESeq2"), ask=FALSE)'
# To use the "local" python, not the system one.
export PATH="/usr/local/bin":$PATH
# To avoid using python things installed in the HOME of root
# (that will be mounted during singularity build)
export PYTHONNOUSERSITE=1
# Needed to manually cythonize custom pybedtools before installing it
#/usr/bin/env python3 -m pip install Cython
#/usr/bin/env python3 -m pip install --global-option="cythonize" git+https://github.com/blaiseli/pybedtools.git@fix_missing_headers
# Getting custom pybedtools that includes *.h headers
#git clone https://github.com/blaiseli/pybedtools.git
#cd pybedtools
#git checkout fix_missing_headers
#/usr/bin/env python3 setup.py cythonize
#/usr/bin/env python3 -m pip install .
# Getting the workflows
cd /usr/local/src
#git clone --recurse-submodules https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
git clone https://gitlab+deploy-token-75:sakyTxfe-PxPHDwqsoGm@gitlab.pasteur.fr/bli/bioinfo_utils.git
cd bioinfo_utils
git submodule update --init --remote --merge
has_requirements=$(find . -name requirements.txt -print | xargs dirname)
for dir in ${has_requirements}
do
(cd ${dir} && pip install -r requirements.txt)
done
has_install=$(find . -name install.sh -print | xargs dirname | grep -v "Nim" | grep -v "Haskell")
for dir in ${has_install}
do
(cd ${dir} && ./install.sh)
done
%environment
export LC_ALL=C
# To use the "local" python, not the system one.
export PATH="/usr/local/bin":$PATH
# To avoid using python things installed in the HOME of the user
# (that will be mounted during container execution)
export PYTHONNOUSERSITE=1
export PATH=/usr/local/src/bioinfo_utils:"${PATH}"
%runscript
cmd="${1}"
shift
exec /usr/local/src/bioinfo_utils/${cmd} "$@"
#!/bin/bash -l
# Usage: run_<something>-seq_pipeline <configuration_file> [extra arguments for snakemake]
# run_<something>-seq_pipeline are aliases to the present file.
# Each alias will run a specific pipeline, depending on its name.
# There may be several aliases for a same pipeline.
# sRNA-seq
# * run_sRNA-seq_pipeline
# * run_small_RNA-seq_pipeline
# GRO-seq
# * run_GRO-seq_pipeline
# * run_PRO-seq_pipeline
# RNA-seq
# * run_RNA-seq_pipeline
# Degradme-seq
# * run_Degradome-seq_pipeline
# Ribo-seq
# * run_Ribo-seq_pipeline
# iCLIP (which has a special "non-seq" name)
# * run_iCLIP-seq_pipeline
# * run_iCLIP_pipeline
# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename "${0}")
function error_exit
{
# ----------------------------------------------------------------
# Function for exit due to fatal program error
# Accepts 1 argument:
# string containing descriptive error message
# ----------------------------------------------------------------
echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
exit 1
}
# https://stackoverflow.com/a/1638397/1878788
# Absolute path to this script
SCRIPT=$(readlink -f "${0}")
# Absolute path this script is in
BASEDIR=$(dirname "${SCRIPT}")
container="${BASEDIR}/run_pipeline"
# Do we have singularity?
singularity --version 2> /dev/null && have_singularity=1
if [ ! ${have_singularity} ]
then
install_doc="https://sylabs.io/guides/3.4/user-guide/quick_start.html#quick-installation-steps"
# Do we have an environment modules system?
module --version 2> /dev/null && have_modules=1
if [ ${have_modules} ]
then
module load singularity || error_exit "singularity is needed to run the pipelines (see ${install_doc})"
else
error_exit "singularity is needed to run the pipelines (see ${install_doc})"
fi
fi
if [ ! -e ${container} ]
then
if [ -e ${BASEDIR}/run_pipeline.def ]
then
echo "The container was not found. Trying to build it. This may take quite some time and requires sudoer's rights."
sudo singularity build ${BASEDIR}/run_pipeline ${BASEDIR}/run_pipeline.def || error_exit "The container could not be built."
else
error_exit "The container was not found, nor a definition file to build it."
fi
fi
genome_dir="/pasteur/entites/Mhe/Genomes"
gene_lists_dir="/pasteur/entites/Mhe/Gene_lists"
if [ ! -e ${genome_dir} -o ! -e ${gene_lists_dir} ]
then
error_exit "The pipelines will look for genome data in ${genome_dir} and gene lists in ${gene_lists_dir}. Make sure it's there."
fi
# This script can be called from various symbolic links.
# The name of the link determines which snakefile to use.
# PRO-seq and GRO-seq are actually the same pipeline
# similarly for sRNA-seq and small_RNA-seq
# similarly for iCLIP-seq and iCLIP
# -B /pasteur will mount /pasteur in the container
# so that it finds the Genome configuration and gene lists
# that are expected to be in a specific location there.
singularity run -B /pasteur ${container} ${PROGNAME} $@
run_pipeline.sh
\ No newline at end of file
run_pipeline.sh
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment