Saving old modifications (not used).

7d9b00ae · Blaise Li · d6d6f365 · 7d9b00ae · 7d9b00ae
Commit 7d9b00ae authored 4 years ago by Blaise Li
--- a/deseq2.bli.graphs/R/deseq2_graphs.R
+++ b/deseq2.bli.graphs/R/deseq2_graphs.R
@@ -182,13 +182,15 @@ scatter_and_volcano <- function(exp_name, p_size=0.2) {
    legend("topright", legend=c("22G_down", "22G_up"), pch=c(20, 20), col=c("blue", "red"), cex=.7)
 }

+
+#TODO: adapt this to any number of replicates (put scatterplot for pairs of replicates in a mfrwo(n, n))
 #' Generating scatterplots between two replicates
 #'
 #' @param exp_name The name of the experiment
 #' @param cond The condition
 #' @export
-replicates_scatterplot <- function(exp_name, cond) {
-    counts <- get_counts(exp_name, cond, count_type="counts")
+replicates_scatterplot <- function(exp_name, cond, sample_table) {
+    counts <- get_counts(exp_name, cond, sample_table, count_type="counts")
    tau <- cor(
        counts[["1"]],
        counts[["2"]],

--- a/deseq2.bli.utils/R/deseq2_utils.R
+++ b/deseq2.bli.utils/R/deseq2_utils.R
@@ -66,7 +66,7 @@ load_dds <- function(exp_name, out_dir, sample_table, design_string, references)
        print("Loading pre-computed data")
        load(data_filename)
    } else {
-        print(exp_name)
+        flog.debug(exp_name)
        print(paste("Reading count data for", exp_name))
        #print(sample_table)
        dds_raw <- DESeqDataSetFromHTSeqCount(
@@ -171,7 +171,7 @@ after.under <- function(s) {
 #' @param wormid2name A hashmap converting Wormbase ID to gene names
 #' @export
 make_subtable <- function(exp_name, ref, other, wormid2cosmid, wormid2name) {
-    #print(exp_name)
+    #flog.debug(exp_name)
    fold_field <- quote(paste0(exp_name, "_log2FoldChange"))
    padj_field <- quote(paste0(exp_name, "_padj"))
    results_table <- data.frame(
@@ -179,7 +179,7 @@ make_subtable <- function(exp_name, ref, other, wormid2cosmid, wormid2name) {
            all_results[, eval(fold_field)],
            all_results[, eval(padj_field)]
        )
-    print(paste("Renaming column names for", exp_name))
+    flog.debug(paste("Renaming column names for", exp_name))
    colnames(results_table) <- c(
        "gene",
        "log2FoldChange",
@@ -266,32 +266,56 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
            envir=parent.frame()
            )
        )[[count_type]]
-    # http://stackoverflow.com/a/40746161/1878788
+    flog.debug("counts_data is a:")
+    flog.debug(class(counts_data))
+    flog.debug("counts_data columns:")
+    flog.debug(colnames(counts_data))
+    flog.debug("sample_table columns:")
+    flog.debug(colnames(sample_table))
+    flog.debug("condition characteristics:")
+    flog.debug(names(cond))
    if (FALSE) {
-        print("setting keys")
-        print(names(cond))
+        # http://stackoverflow.com/a/40746161/1878788
+        flog.debug("setting keys")
        setkeyv(sample_table, names(cond))
-        print(class(sample_table))
-        print(names(sample_table))
-        print("getting library names")
+        flog.debug(class(sample_table))
+        flog.debug(names(sample_table))
+        flog.debug("getting library names")
        # Error in `[.data.frame`(x, i, j) : object 'lib' not found
        lib_names <- sample_table[cond, lib]
-        print(lib_names)
+        flog.debug(lib_names)
        counts_1 <- counts_data[, lib_names[1]]
        counts_2 <- counts_data[, lib_names[2]]
+        return(list("1"=counts_1, "2"=counts_2))
    } else {
-        print("Selecting columns corresponding to condition variables")
-        cond_variables <- sample_table[, names(cond)]
-        print(cond_variables)
-        print("Generating boolean vector to select lines matching the condition")
+        flog.debug("Selecting columns corresponding to condition variables")
+        # drop=FALSE because http://stackoverflow.com/questions/40867703
+        cond_variables <- sample_table[, names(cond), drop=FALSE]
+        flog.debug(cond_variables)
+        flog.debug(cond_variables == cond)
+        flog.debug("Generating boolean vector to select lines matching the condition")
        selection_vector <- apply((cond_variables == cond), 1, all)
-        print("Doing the selection")
+        flog.debug("Doing the selection")
        lines_with_condition <- sample_table[selection_vector,]
-        print("Getting the counts")
-        counts_1 <- counts_data[, filter(lines_with_condition, replicate=="1") %>% select(lib) %>% unlist]
-        counts_2 <- counts_data[, filter(lines_with_condition, replicate=="2") %>% select(lib) %>% unlist]
+        flog.debug("Determining replicates list")
+        replicate_names <- levels(sample_table$replicate)
+        flog.debug("Pre-allocating counts list")
+        replicate_counts <- vector(mode="list", length=length(replicate_names))
+        #counts_1 <- counts_data[, filter(lines_with_condition, replicate=="1") %>% select(lib) %>% unlist]
+        #counts_2 <- counts_data[, filter(lines_with_condition, replicate=="2") %>% select(lib) %>% unlist]
+        for (i in seq_along(replicate_names)) {
+            rep <- replicate_names[i]
+            flog.debug(paste("Finding library name for replicate", rep))
+            # as.character: see comment under http://stackoverflow.com/a/15031603/1878788
+            lib_id <- as.character(filter(lines_with_condition, replicate==rep) %>% select(lib) %>% unlist)
+            flog.debug(paste("Getting the counts for library", lib_id))
+            replicate_counts[[i]] <- counts_data[, lib_id]
+            #replicate_counts[[rep]] <- counts_data[, filter(lines_with_condition, replicate==rep) %>% select(lib) %>% unlist]
        }
-    return(list("1"=counts_1, "2"=counts_2))
+        names(replicate_counts) <- replicate_names
+        return(replicate_counts)
+    }
+    #return(list("1"=counts_1, "2"=counts_2))
 }

 #' Computes the means over replicates of two conditions
@@ -299,12 +323,12 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
 #' @param exp_name The name of the experiment
 #' @param out_dir The directory in which to write the data
 #' @param sample_table The corresponding sample table
-#' @param ref The reference condition
-#' @param other The compared condition
+#' @param ref_condition The reference condition
+#' @param other_condition The compared condition
 #' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
 #' @param wormid2name A hashmap converting Wormbase ID to gene names
 #' @export
-compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2cosmid, wormid2name){
+compute_means <- function(exp_name, out_dir, sample_table, ref_condition, other_condition, wormid2cosmid, wormid2name){
    #mu_data <- assays(
    #        get(paste(exp_name, "dds", sep="_"),
    #        envir=parent.frame()
@@ -315,13 +339,14 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
    # Should we use the raw counts instead ("counts" instead of "mu")?
    # We use "mu" because it may not make sense to compute an average between raw un-normalized counts.

-    ref_counts <- get_counts(exp_name, ref, sample_table, count_type="mu")
-    other_counts <- get_counts(exp_name, other, sample_table, count_type="mu")
+    ref_counts <- get_counts(exp_name, ref_condition, sample_table, count_type="mu")
+    other_counts <- get_counts(exp_name, other_condition, sample_table, count_type="mu")
    mean_ref <- Reduce(`+`, ref_counts) / length(ref_counts)
    mean_other <- Reduce(`+`, other_counts) / length(other_counts)
-    mean_ref_name <- paste(exp_name, "mean", paste(ref, collapse=""), "counts", sep="_")
-    mean_other_name <- paste(exp_name, "mean", paste(other, collapse=""), "counts", sep="_")
-    print(paste("Assigning", mean_ref_name, "and", mean_other_name))
+    #stopifnot(FALSE)
+    mean_ref_name <- paste(exp_name, "mean", paste(ref_condition, collapse=""), "counts", sep="_")
+    mean_other_name <- paste(exp_name, "mean", paste(other_condition, collapse=""), "counts", sep="_")
+    flog.debug(paste("Assigning", mean_ref_name, "and", mean_other_name))
    assign(
        mean_ref_name,
        mean_ref,
@@ -360,31 +385,33 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
 #'
 #' @param exp_name The name of the experiment
 #' @param out_dir The directory in which to write the data
-#' @param ref The reference condition
-#' @param other The compared condition
+#' @param ref_condition The reference condition
+#' @param other_condition The compared condition
 #' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
 #' @param wormid2name A hashmap converting Wormbase ID to gene names
 #' @export
-make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wormid2name) {
+make_results_table <- function(exp_name, out_dir, ref_condition, other_condition, wormid2cosmid, wormid2name) {
    # To use select and %>%
    #require(dplyr)
    require(dtplyr)
    # To use results
    require(DESeq2)
-    print(paste("Computing results for", exp_name))
+    flog.debug(paste("Computing results for", exp_name))
    results_table <- results(
        get(
            paste0(exp_name, "_dds"),
            envir=parent.frame()
            ),
        tidy=TRUE, alpha=0.05) %>% select(row, log2FoldChange, padj)
+    flog.debug(paste("Setting column names for", exp_name))
    colnames(results_table) <- c(
        "gene",
        "log2FoldChange",
        "padj"
        )
-    mean_ref_name <- paste(exp_name, "mean", paste(ref, collapse=""), "counts", sep="_")
-    mean_other_name <- paste(exp_name, "mean", paste(other, collapse=""), "counts", sep="_")
+    flog.debug(paste("Retrieving means for", exp_name))
+    mean_ref_name <- paste(exp_name, "mean", paste(ref_condition, collapse=""), "counts", sep="_")
+    mean_other_name <- paste(exp_name, "mean", paste(other_condition, collapse=""), "counts", sep="_")
    #mean_ref_name <- paste(exp_name, "mean", ref, "counts", sep="_")
    #mean_other_name <- paste(exp_name, "mean", other, "counts", sep="_")
    mean_ref <- get(
@@ -395,6 +422,35 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
        mean_other_name,
        envir=parent.frame()
        )
+    flog.debug(mean_ref)
+    flog.debug(mean_other)
+    flog.debug(paste("Adding means as extra columns for", exp_name))
+    flog.debug("dimensions")
+    flog.debug(dim(
+        results_table$gene))
+    flog.debug(dim(
+        wormid2cosmid$find(results_table$gene)))
+    flog.debug(dim(
+        wormid2name$find(results_table$gene)))
+    flog.debug(dim(
+        results_table[2:3]))
+    flog.debug(dim(
+        mean_ref))
+    flog.debug(dim(
+        mean_other))
+    flog.debug("lengths")
+    flog.debug(length(
+        results_table$gene))
+    flog.debug(length(
+        wormid2cosmid$find(results_table$gene)))
+    flog.debug(length(
+        wormid2name$find(results_table$gene)))
+    flog.debug(length(
+        results_table[2:3]))
+    flog.debug(length(
+        mean_ref))
+    flog.debug(length(
+        mean_other))
    extended_results_table <- cbind(
        results_table$gene,
        wormid2cosmid$find(results_table$gene),
@@ -411,6 +467,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
        #    envir=parent.frame()
        #    )
        )
+    flog.debug(paste("Updating column names for", exp_name))
    colnames(extended_results_table) <- c(
        "gene", "cosmid", "name",
        "log2FoldChange", "padj",
@@ -418,7 +475,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
        "mean_other_counts"
        )
    if (biotype %in% c("transposable_elements_wormbase", "transposable_elements_wormbase_reverse")){
-        print("Appending TE family names")
+        flog.debug("Appending TE family names")
        results_table <- merge(extended_results_table, cosmid_wormid_transposon_family, by="gene")
    }
    table_filename <- paste(
@@ -427,16 +484,19 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
        "results.txt",
        sep="_"
        )
+    print(paste("Writing results to", table_filename))
    write.table(
        extended_results_table,
        file.path(out_dir, table_filename)
        )
+    print(paste("Assigning", paste0(exp_name, "_results")))
    assign(
        paste0(exp_name, "_results"),
        extended_results_table,
        envir=parent.frame()
        #pos=1
        )
+    print(paste("Assigning", paste0(exp_name, "_lfc_percentiles")))
    assign(
        paste0(exp_name, "_lfc_percentiles"),
        quantile(
@@ -447,6 +507,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
        #pos=1
        )
    #lfc_percentiles <- get(paste0(exp_name, "lfc_percentiles"))
+    print(paste("Assigning", paste0(exp_name, "_padj_percentiles")))
    assign(
        paste0(exp_name, "_padj_percentiles"),
        quantile(