Commit 9b6b9357 authored by Blaise Li's avatar Blaise Li
Browse files

Trying to reduce swapping risk.

Also changed the way to cleanup temporary dir when making bigwig files.
parent 49bba75f
......@@ -10,6 +10,13 @@
# the first one with library names and the second with size factors.
# If it is a value, it shoud be the size factor.
workdir=$(mktemp -d)
cleanup()
{
rm -rf ${workdir}
}
# http://linuxcommand.org/wss0150.php
PROGNAME=$(basename $0)
......@@ -20,6 +27,7 @@ error_exit ()
# Accepts 1 argument:
# string containing descriptive error message
# ----------------------------------------------------------------
cleanup
echo "${PROGNAME}: ${1:-"Unknown Error"}" 1>&2
exit 1
}
......@@ -78,13 +86,6 @@ case ${orient} in
;;
esac
workdir=$(mktemp -d)
cleanup()
{
rm -rf ${workdir}
}
bigwig_dir=$(dirname ${bigwig})
echo "working in ${workdir}"
mkdir -p ${bigwig_dir}
......@@ -97,7 +98,8 @@ then
size=$(grep -w "^${lib_name}" ${norm} | cut -f2)
if [ ! ${size} ]
then
cleanup && error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
#cleanup && error_exit "size could not be found on column 2, line ${lib_name} of ${norm}"
else
echo "found ${size} for ${lib_name}"
fi
......@@ -119,27 +121,32 @@ genome_file="${workdir}/chrom_sizes.txt"
samtools view -H ${bam} \
| mawk '$1 == "@SQ" {print $2"\t"$3}' \
| sed 's/SN://g' | sed 's/LN://' \
> ${genome_file} || cleanup && error_exit "making ${genome_file} failed"
> ${genome_file} || error_exit "making ${genome_file} failed"
#> ${genome_file} || cleanup && error_exit "making ${genome_file} failed"
compute_coverage()
{
cmd="bedtools genomecov -bg -split ${orient_filter} ${scaling} -ibam ${bam}"
cmd="niceload --mem 500M bedtools genomecov -bg -split ${orient_filter} ${scaling} -ibam ${bam}"
eval ${cmd} \
| mawk '{ print $1"\t"$2"\t"$3"\tid-"NR"\t"$4; }' | sort-bed - \
|| cleanup && error_exit "compute_coverage failed"
|| error_exit "compute_coverage failed"
#|| cleanup && error_exit "compute_coverage failed"
}
make_bins()
{
cmd="bedmap --faster --echo --mean --delim \"\t\" --skip-unmapped ${bin_file} -"
eval ${cmd} || cleanup && error_exit "make_bins failed"
cmd="niceload --mem 500M bedmap --faster --echo --mean --delim \"\t\" --skip-unmapped ${bin_file} -"
eval ${cmd} || error_exit "make_bins failed"
#eval ${cmd} || cleanup && error_exit "make_bins failed"
}
compute_coverage | make_bins \
> ${bedgraph} || cleanup && error_exit "generating bedgraph failed"
> ${bedgraph} || error_exit "generating bedgraph failed"
#> ${bedgraph} || cleanup && error_exit "generating bedgraph failed"
echo "making bigwig"
bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || cleanup && error_exit "bedGraphToBigWig failed"
niceload --mem 500M bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || error_exit "bedGraphToBigWig failed"
#bedGraphToBigWig ${bedgraph} ${genome_file} ${bigwig} || cleanup && error_exit "bedGraphToBigWig failed"
echo "removing ${bedgraph}"
rm -f ${bedgraph}
......
......@@ -51,19 +51,19 @@ count_fastq_reads()
sort_by_seq()
{{
fastq-sort -s || error_exit "fastq-sort failed"
niceload --mem 500M fastq-sort -s || error_exit "fastq-sort failed"
}}
dedup()
{{
sort_by_seq | remove-duplicates-from-sorted-fastq || error_exit "remove_duplicates_from_sorted_fastq failed"
sort_by_seq | niceload --mem 500M remove-duplicates-from-sorted-fastq || error_exit "remove_duplicates_from_sorted_fastq failed"
}}
trim_random_nt()
{{
# $1: nb of bases to trim at 5' end
# $2: nb of bases to trim at 3' end
cutadapt -u ${{1}} -u -${{2}} - 2> /dev/null || error_exit "trim_random_nt failed"
niceload --mem 500M cutadapt -u ${{1}} -u -${{2}} - 2> /dev/null || error_exit "trim_random_nt failed"
}}
compute_size_distribution()
......
......@@ -86,8 +86,9 @@ find_older_output="find ${output_dir} -depth ! -newermt ${start_day} -print"
log_base="${output_dir}/$(date +"%d%m%y_%Hh%Mm")"
mkdir -p ${output_dir}
echo ${cmd} > ${log_base}.log
# https://unix.stackexchange.com/a/245610/55127
# https://stackoverflow.com/a/692407/1878788
eval ${cmd} > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
eval "niceload --mem 500M ${cmd}" > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
echo -e "This run started on ${start_day}.\nIf you want to find all older output, you can run the following command:\n${find_older_output}\n(Use -delete instead of -print to remove those files (do this only in case of full output update).)" 1>&2
......
......@@ -1159,7 +1159,7 @@ rule feature_count_reads:
shell:
"""
tmpdir=$(TMPDIR=/var/tmp mktemp --tmpdir -d "feature_{wildcards.lib}_{wildcards.rep}_{wildcards.read_type}_{wildcards.mapping_type}_{wildcards.biotype}_{wildcards.orientation}_{wildcards.feature_type}.XXXXXXXXXX")
cmd="featureCounts {params.min_mapq} -a {params.annot} -o {output.counts} -t {wildcards.feature_type} -g "gene_id" -O -s {params.stranded} --fracOverlap 1 --tmpDir ${{tmpdir}} {input}"
cmd="niceload --mem 500M featureCounts {params.min_mapq} -a {params.annot} -o {output.counts} -t {wildcards.feature_type} -g "gene_id" -O -s {params.stranded} --fracOverlap 1 --tmpDir ${{tmpdir}} {input}"
featureCounts -v 2> {log.log}
echo ${{cmd}} 1>> {log.log}
eval ${{cmd}} 1>> {log.log} 2> {log.err} || error_exit "featureCounts failed"
......@@ -1458,7 +1458,7 @@ rule small_RNA_seq_annotate:
mem_mb=19150
shell:
"""
small_RNA_seq_annotate.py -p {threads} -b {input.sorted_bam} -g {input.merged_gtf} \\
niceload --mem 500M small_RNA_seq_annotate.py -p {threads} -b {input.sorted_bam} -g {input.merged_gtf} \\
-r {params.reads_out_dir} -o {params.counts_out_dir} -l {log.mapped_log}
"""
......@@ -1509,7 +1509,7 @@ rule small_RNA_seq_annotate_U:
mem_mb=14700
shell:
"""
small_RNA_seq_annotate.py -p {threads} -b {input.sorted_bam} -u -g {input.merged_gtf} \\
niceload --mem 500M small_RNA_seq_annotate.py -p {threads} -b {input.sorted_bam} -u -g {input.merged_gtf} \\
-r {params.reads_out_dir} -o {params.counts_out_dir} -l {log.mapped_log}
"""
......@@ -2657,7 +2657,7 @@ your deepTools settings and check your input files.
#"""
try:
shell("""
cmd="bamCoverage -b {input.bam} --skipNAs {params.orient_filter} \\
cmd="niceload --mem 500M bamCoverage -b {input.bam} --skipNAs {params.orient_filter} \\
-of=bigwig -bs 10 -p={threads} \\
-o {output.bigwig} \\
1>> {log.log} 2>> {log.err}"
......@@ -2722,7 +2722,7 @@ bam2bigwig.sh: bedGraphToBigWig failed
"""
try:
shell("""
bam2bigwig.sh {input.bam} {params.genome_binned} \\
niceload --mem 500M bam2bigwig.sh {input.bam} {params.genome_binned} \\
{wildcards.lib}_{wildcards.rep} {wildcards.orientation} F \\
%f {output.bigwig_norm} \\
> {log.log} 2> {log.err} \\
......@@ -3417,7 +3417,7 @@ rule plot_biotype_mean_meta_profile:
shell:
"""
tmpdir=$(TMPDIR=/var/tmp mktemp --tmpdir -d "plot_meta_profile_meta_scale_{wildcards.meta_scale}_{wildcards.read_type}_by_{wildcards.norm}_{wildcards.orientation}_{wildcards.type_set}_{wildcards.biotype}_{wildcards.min_len}_{wildcards.group_type}_{wildcards.lib_group}.XXXXXXXXXX")
computeMatrix scale-regions -S {input.bws} \\
niceload --mem 500M computeMatrix scale-regions -S {input.bws} \\
-R {input.bed} \\
{params.meta_params} \\
-p {threads} \\
......@@ -3425,7 +3425,7 @@ rule plot_biotype_mean_meta_profile:
1> {log.log} \\
2> {log.err} \\
|| error_exit "computeMatrix failed"
plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output} \\
niceload --mem 500M plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output} \\
-z {wildcards.read_type}_{wildcards.orientation}_on_{wildcards.biotype} \\
-y "{params.y_label}" \\
--samplesLabel {params.labels} \\
......@@ -3465,7 +3465,7 @@ rule plot_gene_list_mean_meta_profile:
shell:
"""
tmpdir=$(TMPDIR=/var/tmp mktemp --tmpdir -d "plot_meta_profile_meta_scale_{wildcards.meta_scale}_{wildcards.read_type}_by_{wildcards.norm}_{wildcards.orientation}_{wildcards.type_set}_{wildcards.id_list}_{wildcards.group_type}_{wildcards.lib_group}.XXXXXXXXXX")
computeMatrix scale-regions -S {input.bws} \\
niceload --mem 500M computeMatrix scale-regions -S {input.bws} \\
-R {input.bed} \\
{params.meta_params} \\
-p 1 \\
......@@ -3473,7 +3473,7 @@ rule plot_gene_list_mean_meta_profile:
1> {log.log} \\
2> {log.err} \\
|| error_exit "computeMatrix failed"
plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output} \\
niceload --mem 500M plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output} \\
-z {wildcards.read_type}_{wildcards.orientation}_on_{wildcards.id_list} \\
-y "{params.y_label}" \\
--samplesLabel {params.labels} \\
......@@ -3529,7 +3529,7 @@ rule make_pi_matches_bed:
shell:
"""
mkfifo {params.tmp_bam}
samtools view -u -F 4 {input.sam} | samtools sort -o {params.tmp_bam} - &
samtools view -u -F 4 {input.sam} | niceload --mem 500M samtools sort -o {params.tmp_bam} - &
bedtools bamtobed -i {params.tmp_bam} > {output.bed}
rm -f {params.tmp_bam}
"""
......@@ -3546,7 +3546,7 @@ rule locate_pi_targets:
# -f 1.0: We want the piRNA match to be entirely inside the annotation
# -S: We want it to be antisense
# This automatically filters out self-matches.
bedtools intersect -a {input.bed_targets} -b {input.bed_biotype} \\
niceload --mem 500M bedtools intersect -a {input.bed_targets} -b {input.bed_biotype} \\
-f 1.0 -S > {output.bed}
"""
......@@ -3575,7 +3575,7 @@ rule plot_pi_targets_profile:
shell("""
tmpdir=$(TMPDIR=/var/tmp mktemp --tmpdir -d "plot_pi_targets_profile_{wildcards.lib}_{wildcards.rep}_{wildcards.read_type}_by_{wildcards.norm}_{wildcards.orientation}_{wildcards.biotype}.XXXXXXXXXX")
# 501 and 21 are multiples of 3
computeMatrix scale-regions -S {input.bigwig} \\
niceload --mem 500M computeMatrix scale-regions -S {input.bigwig} \\
-R {input.bed} \\
-bs 3 \\
-b 1002 \\
......@@ -3586,7 +3586,7 @@ computeMatrix scale-regions -S {input.bigwig} \\
1> {log.log} \\
2> {log.err} \\
|| error_exit "computeMatrix failed"
plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output.figure} \\
niceload --mem 500M plotProfile -m ${{tmpdir}}/meta_profile.gz -out {output.figure} \\
-y "{params.y_label}" \\
--startLabel "" \\
--endLabel "" \\
......
......@@ -4,19 +4,19 @@ def mapping_command(aligner):
"""This function returns the shell commands to run given the *aligner*."""
if aligner == "hisat2":
shell_commands = """
cmd="hisat2 -p {snakemake.threads} --dta --seed 123 -t {snakemake.params.settings} --mm -x {snakemake.params.index} -U {snakemake.input.fastq} --no-unal --un-gz {snakemake.output.nomap_fastq} -S {snakemake.output.sam}"
cmd="niceload --mem 500M hisat2 -p {snakemake.threads} --dta --seed 123 -t {snakemake.params.settings} --mm -x {snakemake.params.index} -U {snakemake.input.fastq} --no-unal --un-gz {snakemake.output.nomap_fastq} -S {snakemake.output.sam}"
echo ${{cmd}} 1> {snakemake.log.log}
eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
"""
elif aligner == "bowtie2":
shell_commands = """
cmd="bowtie2 -p {snakemake.threads} --seed 123 -t {snakemake.params.settings} --mm -x {snakemake.params.index} -U {snakemake.input.fastq} --no-unal --un-gz {snakemake.output.nomap_fastq} -S {snakemake.output.sam}"
cmd="niceload --mem 500M bowtie2 -p {snakemake.threads} --seed 123 -t {snakemake.params.settings} --mm -x {snakemake.params.index} -U {snakemake.input.fastq} --no-unal --un-gz {snakemake.output.nomap_fastq} -S {snakemake.output.sam}"
echo ${{cmd}} > {snakemake.log.log}
eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
"""
elif aligner == "crac":
shell_commands = """
cmd="crac --nb-threads {snakemake.threads} --summary %s --all %s {snakemake.params.settings} -i {snakemake.params.index} -r {snakemake.input.fastq} --sam {snakemake.output.sam}"
cmd="niceload --mem 500M crac --nb-threads {snakemake.threads} --summary %s --all %s {snakemake.params.settings} -i {snakemake.params.index} -r {snakemake.input.fastq} --sam {snakemake.output.sam}"
echo ${{cmd}} > {snakemake.log.log}
eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
# TODO: extract non mappers from the sam output
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment