diff --git a/smwrappers/explore_mapping_parameters/wrapper.py b/smwrappers/explore_mapping_parameters/wrapper.py new file mode 100644 index 0000000000000000000000000000000000000000..74715b68c974d85ba187bf1249e64c211c82ccd2 --- /dev/null +++ b/smwrappers/explore_mapping_parameters/wrapper.py @@ -0,0 +1,78 @@ +# Copyright (C) 2020 Blaise Li +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +from snakemake.shell import shell +from libworkflows import run_with_modules + +load_modules = snakemake.config.get("load_modules", False) + +def mapping_command(aligner): + """This function returns the shell commands to run given the *aligner*.""" + modules = ["parallel"] + if aligner == "hisat2": + modules.append("hisat2/2.1.0") + shell_commands = """ +cmd="niceload --noswap -q hisat2 -p {snakemake.threads} --dta --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null" +echo ${{cmd}} 1> {snakemake.log.log} +eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err} +rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%%" -f1) +echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record} +""" + elif aligner == "bowtie2": + modules.append("bowtie2/2.3.4.3") + shell_commands = """ +cmd="niceload --noswap -q bowtie2 -p {snakemake.threads} --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null" +echo ${{cmd}} > {snakemake.log.log} +eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err} +rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%%" -f1) +echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record} +""" +# elif aligner == "crac": +# modules.append("crac/1.3.0") +# shell_commands = """ +#cmd="niceload --noswap -q crac --nb-threads {snakemake.threads} --summary %s --all %s {snakemake.params.these_settings} -i {snakemake.params.index} -r {fastq} --sam {snakemake.output.sam}" +#echo ${{cmd}} > {snakemake.log.log} +#eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err} +## TODO: extract non mappers from the sam output +#> {snakemake.output.nomap_fastq} +#""" % (snakemake.output.nomap_fastq.split("_unmapped")[0] + "_summary.txt", snakemake.output.nomap_fastq.split("_unmapped")[0] + "_output.txt") + else: + raise NotImplementedError("%s is not yet handled." % aligner) + if load_modules: + return run_with_modules(shell_commands, modules) + return shell_commands + +with NamedTemporaryFile() as fastq, NamedTemporaryFile() as rates_record: + snakemake.params["fastq"] = fastq.name + # Take the first 10000 reads from the input + shell(f"zcat {snakemake.input.fastq} | head -40000 | gzip > {fastq.name}") + snakemake.params["rates_record"] = rates_record.name + for settings in snakemake.params.settings: + snakemake.params["these_settings"] = settings + shell(mapping_command(snakemake.params.aligner)) + rates_record.seek(0) + (best_rate, best_settings) = max( + (float(rate), settings) + for (rate, settings) + in map(str.split, rates_record)) + with open(snakemake.output.mapping_params, "w") as fh: + fh.write(f"{best_params}\t({best_rate}%%)\n") + +if hasattr(snakemake.output, "methods"): + shell(f"cp {snakemake.input.methods} {snakemake.output.methods}") + methods = f"# {snakemake.rule}\nThe first {} {snakemake.params.read_name} reads were mapped on the {snakemake.params.genome_name} using {snakemake.params.aligner} (version ${{{{v}}}}) with options {snakemake.params.settings}." + shell(f""" + v=$({snakemake.params.aligner} --version | head -1 | awk '{{{{print $3}}}}') + echo "{methods}" >> {snakemake.output.methods} +""")