diff --git a/RNA_Seq_Cecere/RNA-seq.snakefile b/RNA_Seq_Cecere/RNA-seq.snakefile index 13deca1f7f679b487a7a13f6da0f004468acdcd1..429e66af5fd602482d5b825afbf1c4cd3731bb14 100644 --- a/RNA_Seq_Cecere/RNA-seq.snakefile +++ b/RNA_Seq_Cecere/RNA-seq.snakefile @@ -20,6 +20,15 @@ major, minor = sys.version_info[:2] if major < 3 or (major == 3 and minor < 6): sys.exit("Need at least python 3.6\n") + +# TODO (04/10/2022): +# * normalize spike-in counts by their length (RPKM) +# * use scikit-learn to have a correction factor for transcript RPKM +# TODO first (04/10/2022): +# * output normalizations by total spike-ins (currently normalization is hard-coded to use protein_coding): raw from featureCounts / spike and RPKM (M would be "by million spike-in reads") +# * output slope and intercept of spike-in response in a file (and on the plot?) +# * find example config file activating spike-in stuff + # TODO: plot spike-in vs spike-in between libraries to detect anormal spike-ins: should be a straight line # TODO: Add rules to take into account spike-ins. @@ -1408,7 +1417,10 @@ rule plot_spikein_responses: title=f"{libname} spike-ins TPM response") # TODO: gather squared_diffs across libraries and find the most stable spike-ins # Then use those to compute slope again and use it for normalization + # TODO (04/10/2022): save regline_slope and regline_intercept somewhere. ( + # Not the data transformed by LinearRegression + # Just pre-processed data transformed_data, regline_slope, regline_intercept,