Commit d045b9cb authored by Blaise Li's avatar Blaise Li
Browse files

Adding ri_si_22G (rRNA templated endosiRNA).

Not tested yet.
parent a304a2af
......@@ -42,6 +42,10 @@ major, minor = sys.version_info[:2]
if major < 3 or (major == 3 and minor < 6):
sys.exit("Need at least python 3.6\n")
# TODO (27/10/2021): isolate tRFs: sense to tRNA, then look at abundance distribution across sites on their remapping: Sites with narrow size distribution and high enough count are potentially interesting. Is it recurrent across libraries?
# TODO: add tRFs in pimi22G -> pimitRF22G
# TODO (27/10/2021): add ri_si[u]_{22G|26G}: antisense to rRNA genes
# TODO: Try to find what the proportion of small reads map at unique position within repetitive elements.
# What is the distribution of unique reads among repetitive elements of a given family: is it evenly spread, or biased?
# Is it different if we use all small RNAs instead of just unique ones?
......@@ -246,8 +250,9 @@ assert set(DE_TYPES) <= set(SMALL_TYPES + JOINED_SMALL_TYPES), "%s\n%s" % (", ".
#IP_TYPES = ["pisimi", "siu", "prot_si"]
# TODO: update cross_HTS with pimi22G
# TODO: what kind of pimi22G ? -> pi, mi and si_22G, but not siu_22G
IP_TYPES = [f"pimi{SI_MIN}G", f"siu_{SI_MIN}G", f"prot_si_{SI_MIN}G",
f"siu_{SI_MAX}G", f"prot_si_{SI_MAX}G"]
# TODO: add tRFs?
IP_TYPES = [f"pimi{SI_MIN}G", f"siu_{SI_MIN}G", f"prot_si_{SI_MIN}G", f"ri_si_{SI_MIN}G",
f"siu_{SI_MAX}G", f"prot_si_{SI_MAX}G", f"ri_si_{SI_MAX}G"]
assert set(IP_TYPES) <= set(
SMALL_TYPES + [f"all_si_{SI_MIN}G", f"all_si_{SI_MAX}G"] + JOINED_SMALL_TYPES), ", ".join(IP_TYPES)
#IP_TYPES = config["ip_types"]
......@@ -446,6 +451,7 @@ data_dir = config.get("data_dir", OPJ("data"))
mapping_dir = OPJ(aligner, f"mapped_{genome}")
reads_dir = OPJ(mapping_dir, "reads")
annot_counts_dir = OPJ(mapping_dir, "annotation")
# TODO: add tRFs?
# The order after pi and mi must match: this is used in make_read_counts_summary
ANNOT_COUNTS_TYPES = [
"pi", "mi", "all_si",
......@@ -1031,6 +1037,7 @@ rule select_size_range:
# TODO: update this
# TODO: add tRFs?
@wc_applied
def source_fastq(wildcards):
"""Determine the fastq file corresponding to a given read type."""
......@@ -1503,6 +1510,7 @@ rule gather_annotations:
# """
# TODO: This should be updated in order to match the actual output of small_RNA_seq_annotate.py
annotate_read_output = {
small_type: OPJ(reads_dir, "{lib}_{rep}_%s_on_%s" % (size_selected, genome), f"{small_type}RNA.fastq.gz")
for small_type in ANNOT_COUNTS_TYPES}
......@@ -2180,6 +2188,7 @@ rule make_read_counts_summary:
summary_file.write("\t")
summary_file.write(str(sum_counts(annot_counts_files["mi"])))
summary_file.write("\n")
# TODO: add tRF?
rule compute_median_ratio_to_pseudo_ref_size_factors:
......
%TODO: RPM-folds instead of RPKM-folds, remove DESeq, 18-26 instead of 18-30, add ri_si and tRFs (22G is actually 21-23)
\documentclass[beamer]{standalone}
%\documentclass[landscape]{article}
\usepackage[T1]{fontenc}
......
......@@ -162,6 +162,9 @@ def make_annotation_processor(getter_type):
and annot[2] == start
and annot[3] == end)
# TODO: add match_tRF:
# * start and end should fall inside a tRNA annotation
# * the annotation should be same strand as the read
# sense_only = cfilter(same_strand)
# fastq = FQ_TEMPLATE % (name, seq, qual)
# Simplify and collapse in a set, group by biotype
......@@ -187,7 +190,7 @@ def make_annotation_processor(getter_type):
# For miRNA, we also want the coordinates to match exactly.
# For piRNA, we also want the coordinates to match exactly
# and the read to be 21U (actually, we want to accept longer reads).
# TODO (21/10/2021): Add a tRNA category
# TODO (21/10/2021): Add a tRF category
# with no constraints on read extremities coordinates.
# Where in the priority order? After pi and mi, before si?
#
......@@ -227,6 +230,8 @@ def make_annotation_processor(getter_type):
*sorted(biotypes)])
signature = f"{PI_MIN}-{PI_MAX}U"
annotations = annot_set
# elif match_tRF():
# TODO
# elif "piRNA" in biotype2annots:
# annotations = list(biotype2annots["piRNA"])
# annot = frozenset(
......@@ -487,9 +492,9 @@ def main():
stream_processor = FuncApplier([
count_annots, count_small,
count_first_bases])
# TODO (21/10/2021): Add tRNA-derived sRNAs:
# TODO (21/10/2021): Add tRNA-derived sRNAs (tRF):
# small_types = [
# "pi", "mi", "ti", "all_si"
# "pi", "mi", "tRF", "all_si"
# f"all_si_{SI_MIN}G", f"all_si_{SI_MAX}G", *SI_TYPES]
small_types = [
"pi", "mi", "all_si",
......@@ -503,6 +508,9 @@ def main():
# One reason is that elements in small_types_u
# should be in the same order as small_types
# in order to be able to build the u2nonu dict.
# small_types_u = [
# "piu", "miu", "tRFu", "all_siu",
# f"all_siu_{SI_MIN}G", f"all_siu_{SI_MAX}G", *SIU_TYPES]
small_types_u = [
"piu", "miu", "all_siu",
f"all_siu_{SI_MIN}G", f"all_siu_{SI_MAX}G", *SIU_TYPES]
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment