From bd07159a000ba46c4a1de888f2c608547253414f Mon Sep 17 00:00:00 2001 From: Blaise Li <blaise.li__git@nsup.org> Date: Mon, 24 Oct 2022 12:06:00 +0200 Subject: [PATCH] TODO notes for spike-ins. --- RNA_Seq_Cecere/RNA-seq.snakefile | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/RNA_Seq_Cecere/RNA-seq.snakefile b/RNA_Seq_Cecere/RNA-seq.snakefile index 13deca1..429e66a 100644 --- a/RNA_Seq_Cecere/RNA-seq.snakefile +++ b/RNA_Seq_Cecere/RNA-seq.snakefile @@ -20,6 +20,15 @@ major, minor = sys.version_info[:2] if major < 3 or (major == 3 and minor < 6): sys.exit("Need at least python 3.6\n") + +# TODO (04/10/2022): +# * normalize spike-in counts by their length (RPKM) +# * use scikit-learn to have a correction factor for transcript RPKM +# TODO first (04/10/2022): +# * output normalizations by total spike-ins (currently normalization is hard-coded to use protein_coding): raw from featureCounts / spike and RPKM (M would be "by million spike-in reads") +# * output slope and intercept of spike-in response in a file (and on the plot?) +# * find example config file activating spike-in stuff + # TODO: plot spike-in vs spike-in between libraries to detect anormal spike-ins: should be a straight line # TODO: Add rules to take into account spike-ins. @@ -1408,7 +1417,10 @@ rule plot_spikein_responses: title=f"{libname} spike-ins TPM response") # TODO: gather squared_diffs across libraries and find the most stable spike-ins # Then use those to compute slope again and use it for normalization + # TODO (04/10/2022): save regline_slope and regline_intercept somewhere. ( + # Not the data transformed by LinearRegression + # Just pre-processed data transformed_data, regline_slope, regline_intercept, -- GitLab