Select Git revision
wrapper.py 4.36 KiB
# Copyright (C) 2020 Blaise Li
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from tempfile import NamedTemporaryFile
from snakemake.shell import shell
from libworkflows import run_with_modules
load_modules = snakemake.config.get("load_modules", False)
def mapping_command(aligner):
"""This function returns the shell commands to run given the *aligner*."""
modules = ["parallel"]
if aligner == "hisat2":
modules.append("hisat2/2.1.0")
shell_commands = """
cmd="niceload --noswap -q hisat2 -p {snakemake.threads} --dta --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null"
echo ${{cmd}} 1> {snakemake.log.log}
eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%" -f1)
echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record}
"""
elif aligner == "bowtie2":
modules.append("bowtie2/2.3.4.3")
shell_commands = """
cmd="niceload --noswap -q bowtie2 -p {snakemake.threads} --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null"
echo ${{cmd}} > {snakemake.log.log}
eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%" -f1)
echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record}
"""
# elif aligner == "crac":
# modules.append("crac/1.3.0")
# shell_commands = """
#cmd="niceload --noswap -q crac --nb-threads {snakemake.threads} --summary %s --all %s {snakemake.params.these_settings} -i {snakemake.params.index} -r {fastq} --sam {snakemake.output.sam}"
#echo ${{cmd}} > {snakemake.log.log}
#eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
## TODO: extract non mappers from the sam output
#> {snakemake.output.nomap_fastq}
#""" % (snakemake.output.nomap_fastq.split("_unmapped")[0] + "_summary.txt", snakemake.output.nomap_fastq.split("_unmapped")[0] + "_output.txt")
else:
raise NotImplementedError("%s is not yet handled." % aligner)
if load_modules:
return run_with_modules(shell_commands, modules)
return shell_commands
def split_on_tab(s):
# https://stackoverflow.com/a/15095537/1878788
return s.split(b'\t')
with NamedTemporaryFile() as fastq, NamedTemporaryFile() as rates_record:
snakemake.params.fastq = fastq.name
# Take the first 10000 reads from the input
# https://stackoverflow.com/a/46621920/1878788
shell(f"set +o pipefail; zcat {snakemake.input.fastq} | head -40000 | gzip > {fastq.name}")
snakemake.params.rates_record = rates_record.name
for settings in snakemake.params.settings:
snakemake.params.these_settings = settings
shell(mapping_command(snakemake.params.aligner))
rates_record.seek(0)
(best_rate, best_settings) = max(
(float(rate), settings)
for (rate, settings)
in map(split_on_tab, rates_record))
with open(snakemake.output.mapping_params, "w") as fh:
fh.write(f"{best_params}\t({best_rate}%%)\n")
if hasattr(snakemake.output, "methods"):
shell(f"cp {snakemake.input.methods} {snakemake.output.methods}")
methods = f"# {snakemake.rule}\nThe first 10000 {snakemake.params.read_name} reads were mapped on the {snakemake.params.genome_name} using {snakemake.params.aligner} (version ${{{{v}}}}) with options {snakemake.params.settings}. Among those settings, the one resulting in the highest mapping rate was retained."
shell(f"""
v=$({snakemake.params.aligner} --version | head -1 | awk '{{{{print $3}}}}')
echo "{methods}" >> {snakemake.output.methods}
""")