diff --git a/libhts/libhts.py b/libhts/libhts.py index d73e5126daea07d3b273b976040b91d7d7827777..d4a6601c942b9b1f9395d7bf0535e5d4e566cccd 100644 --- a/libhts/libhts.py +++ b/libhts/libhts.py @@ -268,6 +268,25 @@ def make_seeding_function(seeding_string): return seeding_function +def aligner2min_mapq(aligner, wildcards): + """ + Option to filter on MAPQ value in featureCounts. + + What minimal MAPQ value should a read have to be considered uniquely mapped? + See <https://sequencing.qcfail.com/articles/mapq-values-are-really-useful-but-their-implementation-is-a-mess/>. + """ + mapped_type = wildcards.mapped_type + if mapped_type.startswith("unique_"): + if aligner == "hisat2": + return "-Q 60" + elif aligner == "bowtie2": + return "-Q 23" + else: + raise NotImplementedError(f"{aligner} not handled (yet?)") + else: + return "" + + ## Not sure this is a good idea... # def masked_gmean(a, axis=0, dtype=None): # """Modified from stats.py."""