From 58a71439a9f1039d2472ddf1b7bc421d195b2140 Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Mon, 3 Sep 2018 18:08:03 +0200 Subject: [PATCH] Factorized code (MAPQ filtering, RPK, rTRPM). --- libhts/libhts.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/libhts/libhts.py b/libhts/libhts.py index d73e512..d4a6601 100644 --- a/libhts/libhts.py +++ b/libhts/libhts.py @@ -268,6 +268,25 @@ def make_seeding_function(seeding_string): return seeding_function +def aligner2min_mapq(aligner, wildcards): + """ + Option to filter on MAPQ value in featureCounts. + + What minimal MAPQ value should a read have to be considered uniquely mapped? + See <https://sequencing.qcfail.com/articles/mapq-values-are-really-useful-but-their-implementation-is-a-mess/>. + """ + mapped_type = wildcards.mapped_type + if mapped_type.startswith("unique_"): + if aligner == "hisat2": + return "-Q 60" + elif aligner == "bowtie2": + return "-Q 23" + else: + raise NotImplementedError(f"{aligner} not handled (yet?)") + else: + return "" + + ## Not sure this is a good idea... # def masked_gmean(a, axis=0, dtype=None): # """Modified from stats.py.""" -- GitLab