diff --git a/Data_submission/libtype_info.yaml b/Data_submission/libtype_info.yaml index 7d48c1a2972d89b502da530b8a1d6bf46aa3e566..c6a01f758291e462ccadab1cd2da2054efb0d3b4 100644 --- a/Data_submission/libtype_info.yaml +++ b/Data_submission/libtype_info.yaml @@ -16,14 +16,14 @@ sRNA-IP-seq: default_wildcards: norm: non_structural orientation: all - read_type: all_siRNA + read_type: si_22GRNA rule: make_normalized_bigwig snakefile: /pasteur/homes/bli/src/bioinfo_utils/sRNA-seq/sRNA-seq.snakefile sRNA-seq: default_wildcards: norm: non_structural orientation: all - read_type: all_siRNA + read_type: si_22GRNA rule: make_normalized_bigwig snakefile: /pasteur/homes/bli/src/bioinfo_utils/sRNA-seq/sRNA-seq.snakefile diff --git a/small_RNA-seq/small_RNA-seq.snakefile b/small_RNA-seq/small_RNA-seq.snakefile index 5246e24f7ad96e823275d28c7bf98b5d1a0c4145..9e5d124e994b4d9a42723231d93a411433d19ef7 100644 --- a/small_RNA-seq/small_RNA-seq.snakefile +++ b/small_RNA-seq/small_RNA-seq.snakefile @@ -2157,6 +2157,10 @@ rule compute_RPM_folds: logfile.write("Computing counts per million non-structural mappers\n") RPM = 1000000 * counts_data / norm logfile.write("Computing RPM log2 fold changes\n") + # Maybe not a very sensible way to compute the mean folds: it pairs replicates, but the pairing is arbitrary + # TODO: Should we compute the fold of the means instead? + # -> No: the pairing is not necessarily arbitrary. + # The condition pairs may come from the same biological line, for instance. # Add 0.25 as RPM "pseudo-count" (edgeR does this ? https://www.biostars.org/p/102568/#102604) lfc = np.log2(pd.DataFrame( {f"log2({cond}_{rep}/{ref}_{rep})" : (RPM[f"{cond}_{rep}"] + 0.25) / (RPM[f"{ref}_{rep}"] + 0.25) for rep in REPS})) @@ -3761,6 +3765,7 @@ def set_id_lists(wildcards): rule make_gene_list_lfc_boxplots: + """Note: This rule may not work properly if some gene lists contain duplicate entries.""" input: data = source_fold_data, output: diff --git a/small_RNA-seq/small_RNA_nomenclature/small_RNAs.tex b/small_RNA-seq/small_RNA_nomenclature/small_RNAs.tex index 962600439736d1b71f09b89948a79213e64a44dc..632f457e1e721fb796cda5c930a7fff6ed366cbf 100644 --- a/small_RNA-seq/small_RNA_nomenclature/small_RNAs.tex +++ b/small_RNA-seq/small_RNA_nomenclature/small_RNAs.tex @@ -1,5 +1,6 @@ \documentclass[beamer]{standalone} %\documentclass[landscape]{article} +\usepackage[T1]{fontenc} \usepackage{adjustbox} \usepackage{tikz} % to have [below=of x] @@ -13,10 +14,11 @@ \newcommand*{\yMax}{1}% % https://fr.sharelatex.com/blog/2013/08/29/tikz-series-pt3.html -\tikzstyle{reads} = [rectangle, rounded corners, text width=3cm, minimum height=1cm,text centered, draw=black, fill=red!30] -\tikzstyle{virt} = [rectangle, rounded corners, text width=3cm, minimum height=1cm,text centered, draw=gray, fill=red!10] -\tikzstyle{bam} = [rectangle, rounded corners, text width=3cm, minimum height=1cm,text centered, draw=black, fill=blue!30] -\tikzstyle{process} = [rectangle, text width=3cm, minimum height=1cm, text centered, draw=black, fill=orange!30] +\tikzstyle{reads} = [rectangle, rounded corners, text width=3.5cm, minimum height=1cm,text centered, draw=black, fill=red!30] +\tikzstyle{biotyped_reads} = [rectangle, rounded corners, text width=3.5cm, minimum height=0.625cm,text centered, draw=black, fill=red!30] +\tikzstyle{virt} = [rectangle, rounded corners, text width=3.5cm, minimum height=1cm,text centered, draw=gray, fill=red!10] +\tikzstyle{bam} = [rectangle, rounded corners, text width=3.5cm, minimum height=1cm,text centered, draw=black, fill=blue!30] +\tikzstyle{process} = [rectangle, text width=3.5cm, minimum height=1cm, text centered, draw=black, fill=orange!30] \tikzstyle{arrow} = [thick,->,>=stealth] \begin{document} @@ -77,68 +79,91 @@ \pause{} \node[process, right= of unmapped] (annotate) {annotate}; -\node[reads, above right= of annotate] (all_si) {all\_si}; -\node[reads, below right= of all_si] (mi) {mi}; -\node[reads, below= of mi] (pi) {pi}; +\node[reads, above right= of selected_on] (all_si) {all\_si}; +%\node[reads, above right= of annotate] (all_si) {all\_si}; +\node[right= 1cm and 5.5cm of annotate] (pimi) {}; +%\node[biotyped_reads, below right=1cm and 2cm of all_si] (mi) {mi}; +\node[biotyped_reads, right= of pimi] (mi) {mi}; +\node[biotyped_reads, below= of mi] (pi) {pi}; \draw[arrow] (selected_on) -- (annotate); -\draw[arrow] (annotate) -- node[pos=0.825,sloped] {sense} (mi); -\draw[arrow] (annotate) -- node[pos=0.75,sloped] {sense} (pi); -\draw[arrow] (annotate) -- node[pos=0.825,sloped] {22G-26G} node[pos=0.5,sloped,below] {22G(U*)} (all_si); -\pause{} - -\node[virt, above= of all_si] (other_all_si) {not antisense or\\no annotation}; -\node[reads, right= of all_si] (simrep_si) {\{simrep/satel\}\_si}; -\node[reads, above= of simrep_si] (pseu_si) {\{te,pseu\}\_si}; -\node[reads, above= of pseu_si] (prot_si) {prot\_si}; -\draw[arrow] (all_si) -- node[pos=0.75,sloped] {antisense} (prot_si); -\draw[arrow] (all_si) -- node[pos=0.75,sloped] {antisense} (pseu_si); -\draw[arrow] (all_si) -- node[pos=0.5,sloped] {antisense} (simrep_si); -\draw[arrow] (all_si) -- (other_all_si); -\pause{} - -\node[reads, right= of pseu_si] (si) {si}; +\draw[arrow] (annotate) -- node[pos=0.75,sloped] {sense} (mi); +\draw[arrow] (annotate) -- node[pos=0.625,sloped] {sense} (pi); +%\draw[arrow] (annotate) -- node[pos=0.825,sloped] {22G-26G} node[pos=0.5,sloped,below] {22G(U*)} (all_si); +\draw[arrow] (annotate) -- node[pos=0.625,sloped,below] {not mi or pi} (all_si); +\pause{} + +%\node[virt, above= of all_si] (other_all_si) {not antisense or\\no annotation}; +%\node[reads, right= of all_si] (simrep_si) {\{simrep|satel\}\_si\_\{22G|26G\}}; +\node[biotyped_reads, right=1cm and 2cm of all_si] (pseu_si) {pseu\_si\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of pseu_si] (satel_si) {satel\_si\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of satel_si] (simrep_si) {simrep\_si\_\{22G|26G\}}; +\node[biotyped_reads, above=0.5cm and 1.5cm of pseu_si] (prot_si) {prot\_si\_\{22G|26G\}}; +\node[biotyped_reads, above=0.5cm and 1.5cm of prot_si] (te_si) {te\_si\_\{22G|26G\}}; +%\node[biotyped_reads, right=1cm and 2cm of all_si] (simrep_si) {simrep\_si\_\{22G|26G\}}; +%\node[biotyped_reads, above=0.5cm and 1.5cm of simrep_si] (satel_si) {satel\_si\_\{22G|26G\}}; +%\node[biotyped_reads, above=0.5cm and 1.5cm of satel_si] (pseu_si) {pseu\_si\_\{22G|26G\}}; +%\node[biotyped_reads, above=0.5cm and 1.5cm of pseu_si] (prot_si) {prot\_si\_\{22G|26G\}}; +%\node[biotyped_reads, above=0.5cm and 1.5cm of prot_si] (te_si) {te\_si\_\{22G|26G\}}; +\draw[arrow,blue] (all_si) -- node[pos=0.75,sloped] {antisense 22G|26G} (te_si); +\draw[arrow,blue] (all_si) -- node[pos=0.5,sloped,below] {\dots{}} (prot_si); +\draw[arrow,blue] (all_si) -- node[pos=0.5,sloped,below] {\dots{}} (pseu_si); +\draw[arrow,blue] (all_si) -- node[pos=0.5,sloped,below] {\dots{}} (satel_si); +\draw[arrow,blue] (all_si) -- node[pos=0.5,sloped,below] {\dots{}} (simrep_si); +%\draw[arrow] (all_si) -- (other_all_si); +\pause{} + +\node[reads, right= of satel_si] (si) {si\_\{22G|26G\}}; +\draw[arrow] (te_si) -- (si); \draw[arrow] (prot_si) -- (si); \draw[arrow] (pseu_si) -- (si); +\draw[arrow] (satel_si) -- (si); \draw[arrow] (simrep_si) -- (si); \pause{} \node[process, right= of nomap] (annotateU) {annotate}; \node[reads, right= of annotateU] (all_siu) {all\_siu}; \draw[arrow] (nomap_on) -- (annotateU); -\draw[arrow] (annotateU) -- node[pos=0.5,sloped] {22G(U*)} node[pos=0.5,sloped,below] {22G-26G} (all_siu); -\pause{} - -\node[virt, below= of all_siu] (other_all_siu) {not antisense or\\no annotation}; -\node[reads, right= of all_siu] (prot_siu) {prot\_siu}; -\node[reads, below= of prot_siu] (pseu_siu) {\{te,pseu\}\_siu}; -\node[reads, below= of pseu_siu] (simrep_siu) {\{simrep/satel\}\_siu}; -\draw[arrow] (all_siu) -- node[pos=0.5,sloped] {antisense} (prot_siu); -\draw[arrow] (all_siu) -- node[pos=0.25,sloped] {antisense} (pseu_siu); -\draw[arrow] (all_siu) -- node[pos=0.66,sloped,below] {antisense} (simrep_siu); -\draw[arrow] (all_siu) -- (other_all_siu); -\pause{} - -\node[reads, right= of pseu_siu] (siu) {siu}; +%\draw[arrow] (annotateU) -- node[pos=0.5,sloped] {22G(U*)} node[pos=0.5,sloped,below] {22G-26G} (all_siu); +\draw[arrow] (annotateU) -- (all_siu); +\pause{} + +%\node[virt, below= of all_siu] (other_all_siu) {not antisense or\\no annotation}; +\node[biotyped_reads, right=1cm and 2cm of all_siu] (te_siu) {te\_siu\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of te_siu] (prot_siu) {prot\_siu\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of prot_siu] (pseu_siu) {pseu\_siu\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of pseu_siu] (satel_siu) {satel\_siu\_\{22G|26G\}}; +\node[biotyped_reads, below=0.5cm and 1.5cm of satel_siu] (simrep_siu) {simrep\_siu\_\{22G|26G\}}; +\draw[arrow,blue] (all_siu) -- node[pos=0.5,sloped] {antisense} node[pos=0.5,sloped,below] {22G|26G} (te_siu); +\draw[arrow,blue] (all_siu) -- node[pos=0.5,sloped,below] {\dots{}} (prot_siu); +\draw[arrow,blue] (all_siu) -- node[pos=0.5,sloped,below] {\dots{}} (pseu_siu); +\draw[arrow,blue] (all_siu) -- node[pos=0.5,sloped,below] {\dots{}} (satel_siu); +\draw[arrow,blue] (all_siu) -- node[pos=0.5,sloped,below] {\dots{}} (simrep_siu); +%\draw[arrow] (all_siu) -- (other_all_siu); +\pause{} + +\node[reads, right= of pseu_siu] (siu) {siu\_\{22G|26G\}}; +\draw[arrow] (te_siu) -- (siu); \draw[arrow] (prot_siu) -- (siu); \draw[arrow] (pseu_siu) -- (siu); +\draw[arrow] (satel_siu) -- (siu); \draw[arrow] (simrep_siu) -- (siu); \pause{} -\node[reads, below right= of si] (sisiu) {sisiu}; +\node[reads, below right= of si] (sisiu) {sisiu\_\{22G|26G\}}; \draw[arrow] (si) -- (sisiu); \draw[arrow] (siu) -- (sisiu); \pause{} -\node[reads, right= of mi] (pisimi) {pisimi}; -\draw[arrow] (sisiu) -- (pisimi); -\draw[arrow] (mi) -- (pisimi); -\draw[arrow] (pi) -- (pisimi); +\node[reads, right= of mi] (pimi22G) {pimi22G}; +\draw[arrow] (si) -- node[pos=0.4,sloped,below] {22G} (pimi22G); +\draw[arrow] (mi) -- (pimi22G); +\draw[arrow] (pi) -- (pimi22G); \pause{} \node[process, above right= of si] (deseq) {DESeq2}; \draw[arrow] (sisiu) -- (deseq); \draw[arrow] (prot_si) -- (deseq); -\draw[arrow] (pisimi) -- (deseq); +\draw[arrow] (pimi22G) -- (deseq); \pause{} \node[reads, above= of all_siu] (all_sisiu) {all\_sisiu}; @@ -172,9 +197,9 @@ \draw[arrow] (count_filesU) -- (norm); \pause{} -\node[process, below= of pisimi] (RPKM_fold) {RPKM folds}; +\node[process, below= of pimi22G] (RPKM_fold) {RPKM folds}; \draw[arrow] (norm) -- (RPKM_fold); -\draw[arrow] (pisimi) -- (RPKM_fold); +\draw[arrow] (pimi22G) -- (RPKM_fold); \pause{} \node[process, above right= of prot_si] (remap_small) {remap};