Skip to content
Snippets Groups Projects
Select Git revision
  • a9ed83642494a6f4ba532541b868399e741996f5
  • master default protected
2 results

wrapper.py

Blame
  • user avatar
    Blaise Li authored
    a9ed8364
    History
    wrapper.py 4.36 KiB
    # Copyright (C) 2020 Blaise Li
    #
    # This program is free software: you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation, either version 3 of the License, or
    # (at your option) any later version.
    #
    # This program is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU General Public License for more details.
    #
    # You should have received a copy of the GNU General Public License
    # along with this program.  If not, see <https://www.gnu.org/licenses/>.
    from tempfile import NamedTemporaryFile
    from snakemake.shell import shell
    from libworkflows import run_with_modules
    
    load_modules = snakemake.config.get("load_modules", False)
    
    def mapping_command(aligner):
        """This function returns the shell commands to run given the *aligner*."""
        modules = ["parallel"]
        if aligner == "hisat2":
            modules.append("hisat2/2.1.0")
            shell_commands = """
    cmd="niceload --noswap -q hisat2 -p {snakemake.threads} --dta --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null"
    echo ${{cmd}} 1> {snakemake.log.log}
    eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
    rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%" -f1)
    echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record}
    """
        elif aligner == "bowtie2":
            modules.append("bowtie2/2.3.4.3")
            shell_commands = """
    cmd="niceload --noswap -q bowtie2 -p {snakemake.threads} --seed 123 -t {snakemake.params.these_settings} --mm -x {snakemake.params.index} -U {snakemake.params.fastq} --no-unal -S /dev/null"
    echo ${{cmd}} > {snakemake.log.log}
    eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
    rate=$(grep 'overall alignment rate' {snakemake.log.err} | tail -1 | tee -a {snakemake.log.log} | cut -d"%" -f1)
    echo -e "${{rate}}\\t{snakemake.params.these_settings}" >> {snakemake.params.rates_record}
    """
    #    elif aligner == "crac":
    #        modules.append("crac/1.3.0")
    #        shell_commands = """
    #cmd="niceload --noswap -q crac --nb-threads {snakemake.threads} --summary %s --all %s {snakemake.params.these_settings} -i {snakemake.params.index} -r {fastq} --sam {snakemake.output.sam}"
    #echo ${{cmd}} > {snakemake.log.log}
    #eval ${{cmd}} 1>> {snakemake.log.log} 2> {snakemake.log.err}
    ## TODO: extract non mappers from the sam output
    #> {snakemake.output.nomap_fastq}
    #""" % (snakemake.output.nomap_fastq.split("_unmapped")[0] + "_summary.txt", snakemake.output.nomap_fastq.split("_unmapped")[0] + "_output.txt")
        else:
            raise NotImplementedError("%s is not yet handled." % aligner)
        if load_modules:
            return run_with_modules(shell_commands, modules)
        return shell_commands
    
    
    def split_on_tab(s):
        # https://stackoverflow.com/a/15095537/1878788
        return s.split(b'\t')
    
    
    with NamedTemporaryFile() as fastq, NamedTemporaryFile() as rates_record:
        snakemake.params.fastq = fastq.name
        # Take the first 10000 reads from the input
        # https://stackoverflow.com/a/46621920/1878788
        shell(f"set +o pipefail; zcat {snakemake.input.fastq} | head -40000 | gzip > {fastq.name}")
        snakemake.params.rates_record = rates_record.name
        for settings in snakemake.params.settings:
            snakemake.params.these_settings = settings
            shell(mapping_command(snakemake.params.aligner))
        rates_record.seek(0)
        (best_rate, best_settings) = max(
            (float(rate), settings)
            for (rate, settings)
            in map(split_on_tab, rates_record))
        with open(snakemake.output.mapping_params, "w") as fh:
            fh.write(f"{best_params}\t({best_rate}%%)\n")
    
    if hasattr(snakemake.output, "methods"):
        shell(f"cp {snakemake.input.methods} {snakemake.output.methods}")
        methods = f"# {snakemake.rule}\nThe first 10000 {snakemake.params.read_name} reads were mapped on the {snakemake.params.genome_name} using {snakemake.params.aligner} (version ${{{{v}}}}) with options {snakemake.params.settings}. Among those settings, the one resulting in the highest mapping rate was retained."
        shell(f"""
            v=$({snakemake.params.aligner} --version | head -1 | awk '{{{{print $3}}}}')
            echo "{methods}" >> {snakemake.output.methods}
    """)