Commit e4597f9a authored by Blaise Li's avatar Blaise Li
Browse files

Run from within output_dir. Minor GRO-seq changes.

Made genome configurable for GRO-seq, tried to debug rpy2 uncachable
exceptions.
parent 233c7a4e
......@@ -54,9 +54,11 @@ merged_fastq = config["merged_fastq"]
barcode_dict = config["barcode_dict"]
BARCODES = list(barcode_dict.keys())
MAX_DIFF = config["max_diff"]
output_dir = config["output_dir"]
log_dir = OPJ(output_dir, "logs")
data_dir = OPJ(output_dir, "data")
#output_dir = config["output_dir"]
#workdir: config["output_dir"]
output_dir = os.path.abspath(".")
log_dir = OPJ("logs")
data_dir = OPJ("data")
demux_dir = OPJ(data_dir, f"demultiplexed_{MAX_DIFF}")
lib2raw = defaultdict(dict)
REPS = set()
......@@ -144,21 +146,21 @@ preprocessing = [
mapping = [
## Will be pulled in as dependencies of other needed results:
# expand(OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED),
# expand(OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED),
##
expand(
OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_samtools_stats.txt" % genome),
OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_samtools_stats.txt" % genome),
trimmer=TRIMMERS, lib=LIBS, rep=REPS,
read_type=POST_TRIMMING + SIZE_SELECTED + [f"{to_map}_unmapped" for to_map in POST_TRIMMING + SIZE_SELECTED]),
]
counting = [
## Will be pulled in as dependencies of other needed results:
# expand(OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED, biotype=COUNT_BIOTYPES, orientation=ORIENTATIONS),
# expand(OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED, biotype=COUNT_BIOTYPES, orientation=ORIENTATIONS),
##
expand(OPJ(output_dir, "{trimmer}", aligner, f"mapped_{genome}", "feature_count", "summaries", "all_{read_type}_on_%s_{orientation}_counts.txt" % genome), trimmer=TRIMMERS, read_type=POST_TRIMMING + SIZE_SELECTED, orientation=ORIENTATIONS),
expand(OPJ(output_dir, "{trimmer}", aligner, f"mapped_{genome}", "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"), trimmer=TRIMMERS, read_type=POST_TRIMMING + SIZE_SELECTED, biotype=COUNT_BIOTYPES, orientation=ORIENTATIONS),
expand(OPJ(output_dir, "{trimmer}", aligner, f"mapped_{genome}", "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED, norm=NORM_TYPES, orientation=["all"]),
expand(OPJ("{trimmer}", aligner, f"mapped_{genome}", "feature_count", "summaries", "all_{read_type}_on_%s_{orientation}_counts.txt" % genome), trimmer=TRIMMERS, read_type=POST_TRIMMING + SIZE_SELECTED, orientation=ORIENTATIONS),
expand(OPJ("{trimmer}", aligner, f"mapped_{genome}", "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"), trimmer=TRIMMERS, read_type=POST_TRIMMING + SIZE_SELECTED, biotype=COUNT_BIOTYPES, orientation=ORIENTATIONS),
expand(OPJ("{trimmer}", aligner, f"mapped_{genome}", "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome), trimmer=TRIMMERS, lib=LIBS, rep=REPS, read_type=POST_TRIMMING + SIZE_SELECTED, norm=NORM_TYPES, orientation=["all"]),
]
#TODO:
......@@ -407,8 +409,8 @@ rule map_on_genome:
fastq = source_fastq,
output:
# sam files take a lot of space
sam = temp(OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s.sam" % genome)),
nomap_fastq = OPJ(output_dir, "{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.fastq.gz" % genome),
sam = temp(OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s.sam" % genome)),
nomap_fastq = OPJ("{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.fastq.gz" % genome),
wildcard_constraints:
read_type = "|".join(POST_TRIMMING + SIZE_SELECTED)
params:
......@@ -430,11 +432,11 @@ rule remap_on_genome:
input:
# fastq = OPJ(data_dir, "trimmed_{trimmer}", "{lib}_{rep}_{read_type}.fastq.gz"),
#fastq = rules.map_on_genome.output.nomap_fastq,
fastq = OPJ(output_dir, "{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.fastq.gz" % genome),
fastq = OPJ("{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.fastq.gz" % genome),
output:
# sam files take a lot of space
sam = temp(OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.sam" % genome)),
nomap_fastq = OPJ(output_dir, "{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_unmapped_on_%s.fastq.gz" % genome),
sam = temp(OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_on_%s.sam" % genome)),
nomap_fastq = OPJ("{trimmer}", aligner, "info_mapping_%s" % genome, "{lib}_{rep}_{read_type}_unmapped_unmapped_on_%s.fastq.gz" % genome),
wildcard_constraints:
read_type = "|".join(POST_TRIMMING + SIZE_SELECTED)
#wildcard_constraints:
......@@ -456,10 +458,10 @@ rule remap_on_genome:
rule sam2indexedbam:
input:
sam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s.sam" % genome),
sam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s.sam" % genome),
output:
sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome),
index = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam.bai" % genome),
sorted_bam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome),
index = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam.bai" % genome),
message:
"Sorting and indexing sam file for {wildcards.lib}_{wildcards.rep}_{wildcards.read_type}."
log:
......@@ -475,7 +477,7 @@ rule compute_mapping_stats:
input:
sorted_bam = rules.sam2indexedbam.output.sorted_bam,
output:
stats = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_samtools_stats.txt" % genome),
stats = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_samtools_stats.txt" % genome),
shell:
"""samtools stats {input.sorted_bam} > {output.stats}"""
......@@ -484,11 +486,11 @@ rule fuse_bams:
"""This rule fuses the two sorted bam files corresponding to the mapping
of the reads containing the adaptor or not."""
input:
noadapt_sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_noadapt_on_%s_sorted.bam" % genome),
adapt_sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_adapt_on_%s_sorted.bam" % genome),
noadapt_sorted_bam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_noadapt_on_%s_sorted.bam" % genome),
adapt_sorted_bam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_adapt_on_%s_sorted.bam" % genome),
output:
sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_on_%s_sorted.bam" % genome),
bai = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_on_C_%s_sorted.bam.bai" % genome),
sorted_bam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_on_%s_sorted.bam" % genome),
bai = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_on_C_%s_sorted.bam.bai" % genome),
message:
"Fusing sorted bam files for {wildcards.lib}_{wildcards.rep}"
log:
......@@ -520,11 +522,11 @@ def biotype2annot(wildcards):
rule feature_count_reads:
input:
sorted_bam = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome),
bai = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam.bai" % genome),
sorted_bam = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam" % genome),
bai = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "{lib}_{rep}_{read_type}_on_%s_sorted.bam.bai" % genome),
output:
counts = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"),
counts_converted = OPJ(output_dir, "{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts_gene_names.txt"),
counts = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"),
counts_converted = OPJ("{trimmer}", aligner, "mapped_C_elegans", "feature_count", "{lib}_{rep}_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts_gene_names.txt"),
params:
stranded = feature_orientation2stranded(LIB_TYPE),
annot = biotype2annot,
......@@ -549,9 +551,9 @@ rule feature_count_reads:
rule summarize_feature_counts:
"""For a given library, compute the total counts for each biotype and write this in a summary table."""
input:
biotype_counts_files = expand(OPJ(output_dir, "{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "{{lib}}_{{rep}}_{{read_type}}_on_%s" % genome, "{biotype}_{{orientation}}_counts.txt"), biotype=COUNT_BIOTYPES),
biotype_counts_files = expand(OPJ("{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "{{lib}}_{{rep}}_{{read_type}}_on_%s" % genome, "{biotype}_{{orientation}}_counts.txt"), biotype=COUNT_BIOTYPES),
output:
summary = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "{lib}_{rep}_{read_type}_on_%s_{orientation}_counts.txt" % genome),
summary = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "{lib}_{rep}_{read_type}_on_%s_{orientation}_counts.txt" % genome),
run:
sum_counter = sum_feature_counts
with open(output.summary, "w") as summary_file:
......@@ -563,12 +565,11 @@ rule summarize_feature_counts:
rule gather_read_counts_summaries:
input:
summary_tables = expand(OPJ(output_dir, "{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "{lib}_{rep}_{{read_type}}_on_%s_{{orientation}}_counts.txt" % genome), lib=LIBS, rep=REPS),
summary_tables = expand(OPJ("{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "{lib}_{rep}_{{read_type}}_on_%s_{{orientation}}_counts.txt" % genome), lib=LIBS, rep=REPS),
output:
summary_table = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "all_{read_type}_on_%s_{orientation}_counts.txt" % genome),
summary_table = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "summaries", "all_{read_type}_on_%s_{orientation}_counts.txt" % genome),
run:
summary_files = (OPJ(
output_dir,
wildcards.trimmer,
aligner,
"mapped_%s" % genome,
......@@ -585,9 +586,9 @@ rule gather_read_counts_summaries:
rule gather_counts:
"""For a given biotype, gather counts from all libraries in one table."""
input:
counts_tables = expand(OPJ(output_dir, "{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{{read_type}}_on_%s" % genome, "{{biotype}}_{{orientation}}_counts.txt"), lib=LIBS, rep=REPS),
counts_tables = expand(OPJ("{{trimmer}}", aligner, "mapped_%s" % genome, "feature_count", "{lib}_{rep}_{{read_type}}_on_%s" % genome, "{{biotype}}_{{orientation}}_counts.txt"), lib=LIBS, rep=REPS),
output:
counts_table = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"),
counts_table = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_counts.txt"),
# wildcard_constraints:
# # Avoid ambiguity with join_all_counts
# biotype = "|".join(COUNT_BIOTYPES)
......@@ -595,7 +596,6 @@ rule gather_counts:
# Gathering the counts data
############################
counts_files = (OPJ(
output_dir,
wildcards.trimmer,
aligner,
"mapped_%s" % genome,
......@@ -635,7 +635,7 @@ rule compute_median_ratio_to_pseudo_ref_size_factors:
input:
counts_table = rules.gather_counts.output.counts_table,
output:
median_ratios_file = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_median_ratios_to_pseudo_ref.txt"),
median_ratios_file = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "{biotype}_{orientation}_median_ratios_to_pseudo_ref.txt"),
run:
counts_data = pd.read_table(
input.counts_table,
......@@ -652,7 +652,7 @@ rule compute_median_ratio_to_pseudo_ref_size_factors:
def source_norm_file(wildcards):
if wildcards.norm == "median_ratio_to_pseudo_ref":
return OPJ(output_dir, f"{wildcards.trimmer}", aligner, f"mapped_{genome}", "feature_count", f"all_{wildcards.read_type}_on_%s" % genome, "protein_coding_fwd_median_ratios_to_pseudo_ref.txt")
return OPJ(f"{wildcards.trimmer}", aligner, f"mapped_{genome}", "feature_count", f"all_{wildcards.read_type}_on_%s" % genome, "protein_coding_fwd_median_ratios_to_pseudo_ref.txt")
else:
return rules.summarize_feature_counts.output.summary
......@@ -664,11 +664,11 @@ rule make_normalized_bigwig:
# TODO: use sourcing function based on norm
norm_file = source_norm_file,
#size_factor_file = rules.compute_coverage.output.coverage
#median_ratios_file = OPJ(output_dir, "{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "protein_coding_fwd_median_ratios_to_pseudo_ref.txt"),
#median_ratios_file = OPJ("{trimmer}", aligner, "mapped_%s" % genome, "feature_count", "all_{read_type}_on_%s" % genome, "protein_coding_fwd_median_ratios_to_pseudo_ref.txt"),
# TODO: compute this
#scale_factor_file = OPJ(output_dir, aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "pisimi_median_ratios_to_pseudo_ref.txt"),
#scale_factor_file = OPJ(aligner, "mapped_C_elegans", "annotation", "all_%s_on_C_elegans" % size_selected, "pisimi_median_ratios_to_pseudo_ref.txt"),
output:
bigwig_norm = OPJ(output_dir, "{trimmer}", aligner, f"mapped_{genome}", "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
bigwig_norm = OPJ("{trimmer}", aligner, f"mapped_{genome}", "{lib}_{rep}_{read_type}_on_%s_by_{norm}_{orientation}.bw" % genome),
#params:
# orient_filter = bamcoverage_filter,
threads: 12 # to limit memory usage, actually
......
......@@ -83,12 +83,14 @@ annot_dir = genome_dict["annot_dir"]
convert_dir = genome_dict["convert_dir"]
gene_lists_dir = genome_dict["gene_lists_dir"]
avail_id_lists = set(glob(OPJ(gene_lists_dir, "*_ids.txt")))
output_dir = config["output_dir"]
log_dir = OPJ(output_dir, f"logs_{genome}")
data_dir = OPJ(output_dir, "data")
#output_dir = config["output_dir"]
#workdir: config["output_dir"]
output_dir = os.path.abspath(".")
log_dir = OPJ(f"logs_{genome}")
data_dir = OPJ("data")
counter = "feature_count"
counts_dir = OPJ(output_dir, aligner, f"mapped_{genome}", counter)
counts_dir = OPJ(aligner, f"mapped_{genome}", counter)
# Limit risks of ambiguity by imposing replicates to be numbers
# and restricting possible forms of some other wildcards
......@@ -103,7 +105,7 @@ rule all:
# OPJ(data_dir, "trimmed", "{lib}_{rep}.fastq.gz"),
# lib=LIBS, rep=REPS),
# expand(
# OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam" % genome),
# OPJ(aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam" % genome),
# lib=LIBS, rep=REPS),
# expand(
# OPJ(counts_dir,
......@@ -151,8 +153,8 @@ rule map_on_genome:
fastq = rules.trim_reads.output.trimmed,
output:
# temp because it uses a lot of space
sam = temp(OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s.sam" % genome)),
nomap_fastq = OPJ(output_dir, aligner, f"not_mapped_{genome}", "{lib}_{rep}_unmapped_on_%s.fastq.gz" % genome),
sam = temp(OPJ(aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s.sam" % genome)),
nomap_fastq = OPJ(aligner, f"not_mapped_{genome}", "{lib}_{rep}_unmapped_on_%s.fastq.gz" % genome),
params:
aligner = aligner,
index = genome_db,
......@@ -174,8 +176,8 @@ rule sam2indexedbam:
input:
sam = rules.map_on_genome.output.sam,
output:
sorted_bam = protected(OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam" % genome)),
index = protected(OPJ(output_dir, aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam.bai" % genome)),
sorted_bam = protected(OPJ(aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam" % genome)),
index = protected(OPJ(aligner, f"mapped_{genome}", "{lib}_{rep}_on_%s_sorted.bam.bai" % genome)),
message:
"Sorting and indexing sam file for {wildcards.lib}_{wildcards.rep}."
log:
......@@ -377,7 +379,7 @@ rule compute_RPK:
input:
counts_data = source_counts,
#counts_data = rules.gather_counts.output.counts_table,
#counts_data = OPJ(output_dir, aligner, f"mapped_{genome}", "feature_count",
#counts_data = OPJ(aligner, f"mapped_{genome}", "feature_count",
# f"all_on_{genome}", "{biotype}_{orientation}_counts.txt"),
output:
rpk_file = OPJ(counts_dir,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -67,24 +67,32 @@ shift
if [ -e ${configfile} ]
then
kilobytes_tot=$(mawk '$1 == "MemTotal:" {print $2}' /proc/meminfo)
# Some rules were given a "mem_mb" resource section based on the "max_vms" benchmarking result.
# See the /pasteur/homes/bli/Documents/Informatique/benchmarks/Pipeline_benchmarking/Pipeline_benchmarking.ipynb jupyter notebook.
# These values are in megabytes (https://stackoverflow.com/a/47201241/1878788)
# We divide the total memory (in kB) by 1100 instead of 1000
# to avoid pretending that we have all this memory available for snakemake rules.
megabytes_resource=$(echo "${kilobytes_tot} / 1100" | bc)
cmd="snakemake -s ${snakefile} --configfile ${configfile} --resources mem_mb=${megabytes_resource} $@"
echo "Pipeline configuration found: ${configfile}"
else
error_exit "Pipeline configuration file ${configfile} not found."
fi
# Determine the output directory and where to log the pipeline (fragile!)
output_dir=$(grep "output_dir" "${configfile}" | mawk '{print $NF}' | sed 's/,$//' | sed 's/"//g')
mkdir -p ${output_dir}
start_day=$(date +"%Y-%m-%d")
find_older_output="find ${output_dir} -depth ! -newermt ${start_day} -print"
log_base="${output_dir}/$(date +"%d%m%y_%Hh%Mm")"
mkdir -p ${output_dir}
config_base=$(basename ${configfile})
config_snapshot="${output_dir}/${config_base}"
echo "Saving a local copy of the configuration in ${config_snapshot}"
cp -f ${configfile} ${config_snapshot}
kilobytes_tot=$(mawk '$1 == "MemTotal:" {print $2}' /proc/meminfo)
# Some rules were given a "mem_mb" resource section based on the "max_vms" benchmarking result.
# See the /pasteur/homes/bli/Documents/Informatique/benchmarks/Pipeline_benchmarking/Pipeline_benchmarking.ipynb jupyter notebook.
# These values are in megabytes (https://stackoverflow.com/a/47201241/1878788)
# We divide the total memory (in kB) by 1100 instead of 1000
# to avoid pretending that we have all this memory available for snakemake rules.
megabytes_resource=$(echo "${kilobytes_tot} / 1100" | bc)
cmd="(cd ${output_dir}; snakemake -s ${snakefile} --configfile ${config_base} --resources mem_mb=${megabytes_resource} $@)"
echo ${cmd} > ${log_base}.log
# https://unix.stackexchange.com/a/245610/55127
# https://stackoverflow.com/a/692407/1878788
......@@ -93,7 +101,6 @@ echo ${cmd} > ${log_base}.log
eval ${cmd} > >(tee -a ${log_base}.log) 2> >(tee -a ${log_base}.err >&2) || error_exit "${cmd} failed, see ${log_base}.err"
end_day=$(date +"%Y-%m-%d")
#echo -e "This run started on ${start_day}.\nIf you want to find all older output, you can run the following command:\n${find_older_output}\n(Use -delete instead of -print to remove those files (do this only in case of full output update).)" 1>&2
echo -e "This run started on ${start_day} and ended on ${end_day}.\n" 1>&2
......
This diff is collapsed.
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment