Skip to content
Snippets Groups Projects
Commit 7d9b00ae authored by Blaise Li's avatar Blaise Li
Browse files

Saving old modifications (not used).

parent d6d6f365
No related branches found
No related tags found
No related merge requests found
......@@ -182,13 +182,15 @@ scatter_and_volcano <- function(exp_name, p_size=0.2) {
legend("topright", legend=c("22G_down", "22G_up"), pch=c(20, 20), col=c("blue", "red"), cex=.7)
}
#TODO: adapt this to any number of replicates (put scatterplot for pairs of replicates in a mfrwo(n, n))
#' Generating scatterplots between two replicates
#'
#' @param exp_name The name of the experiment
#' @param cond The condition
#' @export
replicates_scatterplot <- function(exp_name, cond) {
counts <- get_counts(exp_name, cond, count_type="counts")
replicates_scatterplot <- function(exp_name, cond, sample_table) {
counts <- get_counts(exp_name, cond, sample_table, count_type="counts")
tau <- cor(
counts[["1"]],
counts[["2"]],
......
......@@ -66,7 +66,7 @@ load_dds <- function(exp_name, out_dir, sample_table, design_string, references)
print("Loading pre-computed data")
load(data_filename)
} else {
print(exp_name)
flog.debug(exp_name)
print(paste("Reading count data for", exp_name))
#print(sample_table)
dds_raw <- DESeqDataSetFromHTSeqCount(
......@@ -171,7 +171,7 @@ after.under <- function(s) {
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
make_subtable <- function(exp_name, ref, other, wormid2cosmid, wormid2name) {
#print(exp_name)
#flog.debug(exp_name)
fold_field <- quote(paste0(exp_name, "_log2FoldChange"))
padj_field <- quote(paste0(exp_name, "_padj"))
results_table <- data.frame(
......@@ -179,7 +179,7 @@ make_subtable <- function(exp_name, ref, other, wormid2cosmid, wormid2name) {
all_results[, eval(fold_field)],
all_results[, eval(padj_field)]
)
print(paste("Renaming column names for", exp_name))
flog.debug(paste("Renaming column names for", exp_name))
colnames(results_table) <- c(
"gene",
"log2FoldChange",
......@@ -266,32 +266,56 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
envir=parent.frame()
)
)[[count_type]]
# http://stackoverflow.com/a/40746161/1878788
flog.debug("counts_data is a:")
flog.debug(class(counts_data))
flog.debug("counts_data columns:")
flog.debug(colnames(counts_data))
flog.debug("sample_table columns:")
flog.debug(colnames(sample_table))
flog.debug("condition characteristics:")
flog.debug(names(cond))
if (FALSE) {
print("setting keys")
print(names(cond))
# http://stackoverflow.com/a/40746161/1878788
flog.debug("setting keys")
setkeyv(sample_table, names(cond))
print(class(sample_table))
print(names(sample_table))
print("getting library names")
flog.debug(class(sample_table))
flog.debug(names(sample_table))
flog.debug("getting library names")
# Error in `[.data.frame`(x, i, j) : object 'lib' not found
lib_names <- sample_table[cond, lib]
print(lib_names)
flog.debug(lib_names)
counts_1 <- counts_data[, lib_names[1]]
counts_2 <- counts_data[, lib_names[2]]
return(list("1"=counts_1, "2"=counts_2))
} else {
print("Selecting columns corresponding to condition variables")
cond_variables <- sample_table[, names(cond)]
print(cond_variables)
print("Generating boolean vector to select lines matching the condition")
flog.debug("Selecting columns corresponding to condition variables")
# drop=FALSE because http://stackoverflow.com/questions/40867703
cond_variables <- sample_table[, names(cond), drop=FALSE]
flog.debug(cond_variables)
flog.debug(cond_variables == cond)
flog.debug("Generating boolean vector to select lines matching the condition")
selection_vector <- apply((cond_variables == cond), 1, all)
print("Doing the selection")
flog.debug("Doing the selection")
lines_with_condition <- sample_table[selection_vector,]
print("Getting the counts")
counts_1 <- counts_data[, filter(lines_with_condition, replicate=="1") %>% select(lib) %>% unlist]
counts_2 <- counts_data[, filter(lines_with_condition, replicate=="2") %>% select(lib) %>% unlist]
flog.debug("Determining replicates list")
replicate_names <- levels(sample_table$replicate)
flog.debug("Pre-allocating counts list")
replicate_counts <- vector(mode="list", length=length(replicate_names))
#counts_1 <- counts_data[, filter(lines_with_condition, replicate=="1") %>% select(lib) %>% unlist]
#counts_2 <- counts_data[, filter(lines_with_condition, replicate=="2") %>% select(lib) %>% unlist]
for (i in seq_along(replicate_names)) {
rep <- replicate_names[i]
flog.debug(paste("Finding library name for replicate", rep))
# as.character: see comment under http://stackoverflow.com/a/15031603/1878788
lib_id <- as.character(filter(lines_with_condition, replicate==rep) %>% select(lib) %>% unlist)
flog.debug(paste("Getting the counts for library", lib_id))
replicate_counts[[i]] <- counts_data[, lib_id]
#replicate_counts[[rep]] <- counts_data[, filter(lines_with_condition, replicate==rep) %>% select(lib) %>% unlist]
}
return(list("1"=counts_1, "2"=counts_2))
names(replicate_counts) <- replicate_names
return(replicate_counts)
}
#return(list("1"=counts_1, "2"=counts_2))
}
#' Computes the means over replicates of two conditions
......@@ -299,12 +323,12 @@ get_counts <- function(exp_name, cond, sample_table, count_type="counts"){
#' @param exp_name The name of the experiment
#' @param out_dir The directory in which to write the data
#' @param sample_table The corresponding sample table
#' @param ref The reference condition
#' @param other The compared condition
#' @param ref_condition The reference condition
#' @param other_condition The compared condition
#' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2cosmid, wormid2name){
compute_means <- function(exp_name, out_dir, sample_table, ref_condition, other_condition, wormid2cosmid, wormid2name){
#mu_data <- assays(
# get(paste(exp_name, "dds", sep="_"),
# envir=parent.frame()
......@@ -315,13 +339,14 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
# Should we use the raw counts instead ("counts" instead of "mu")?
# We use "mu" because it may not make sense to compute an average between raw un-normalized counts.
ref_counts <- get_counts(exp_name, ref, sample_table, count_type="mu")
other_counts <- get_counts(exp_name, other, sample_table, count_type="mu")
ref_counts <- get_counts(exp_name, ref_condition, sample_table, count_type="mu")
other_counts <- get_counts(exp_name, other_condition, sample_table, count_type="mu")
mean_ref <- Reduce(`+`, ref_counts) / length(ref_counts)
mean_other <- Reduce(`+`, other_counts) / length(other_counts)
mean_ref_name <- paste(exp_name, "mean", paste(ref, collapse=""), "counts", sep="_")
mean_other_name <- paste(exp_name, "mean", paste(other, collapse=""), "counts", sep="_")
print(paste("Assigning", mean_ref_name, "and", mean_other_name))
#stopifnot(FALSE)
mean_ref_name <- paste(exp_name, "mean", paste(ref_condition, collapse=""), "counts", sep="_")
mean_other_name <- paste(exp_name, "mean", paste(other_condition, collapse=""), "counts", sep="_")
flog.debug(paste("Assigning", mean_ref_name, "and", mean_other_name))
assign(
mean_ref_name,
mean_ref,
......@@ -360,31 +385,33 @@ compute_means <- function(exp_name, out_dir, sample_table, ref, other, wormid2co
#'
#' @param exp_name The name of the experiment
#' @param out_dir The directory in which to write the data
#' @param ref The reference condition
#' @param other The compared condition
#' @param ref_condition The reference condition
#' @param other_condition The compared condition
#' @param wormid2cosmid A hashmap converting Wormbase ID to cosmid-derived ID
#' @param wormid2name A hashmap converting Wormbase ID to gene names
#' @export
make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wormid2name) {
make_results_table <- function(exp_name, out_dir, ref_condition, other_condition, wormid2cosmid, wormid2name) {
# To use select and %>%
#require(dplyr)
require(dtplyr)
# To use results
require(DESeq2)
print(paste("Computing results for", exp_name))
flog.debug(paste("Computing results for", exp_name))
results_table <- results(
get(
paste0(exp_name, "_dds"),
envir=parent.frame()
),
tidy=TRUE, alpha=0.05) %>% select(row, log2FoldChange, padj)
flog.debug(paste("Setting column names for", exp_name))
colnames(results_table) <- c(
"gene",
"log2FoldChange",
"padj"
)
mean_ref_name <- paste(exp_name, "mean", paste(ref, collapse=""), "counts", sep="_")
mean_other_name <- paste(exp_name, "mean", paste(other, collapse=""), "counts", sep="_")
flog.debug(paste("Retrieving means for", exp_name))
mean_ref_name <- paste(exp_name, "mean", paste(ref_condition, collapse=""), "counts", sep="_")
mean_other_name <- paste(exp_name, "mean", paste(other_condition, collapse=""), "counts", sep="_")
#mean_ref_name <- paste(exp_name, "mean", ref, "counts", sep="_")
#mean_other_name <- paste(exp_name, "mean", other, "counts", sep="_")
mean_ref <- get(
......@@ -395,6 +422,35 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
mean_other_name,
envir=parent.frame()
)
flog.debug(mean_ref)
flog.debug(mean_other)
flog.debug(paste("Adding means as extra columns for", exp_name))
flog.debug("dimensions")
flog.debug(dim(
results_table$gene))
flog.debug(dim(
wormid2cosmid$find(results_table$gene)))
flog.debug(dim(
wormid2name$find(results_table$gene)))
flog.debug(dim(
results_table[2:3]))
flog.debug(dim(
mean_ref))
flog.debug(dim(
mean_other))
flog.debug("lengths")
flog.debug(length(
results_table$gene))
flog.debug(length(
wormid2cosmid$find(results_table$gene)))
flog.debug(length(
wormid2name$find(results_table$gene)))
flog.debug(length(
results_table[2:3]))
flog.debug(length(
mean_ref))
flog.debug(length(
mean_other))
extended_results_table <- cbind(
results_table$gene,
wormid2cosmid$find(results_table$gene),
......@@ -411,6 +467,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
# envir=parent.frame()
# )
)
flog.debug(paste("Updating column names for", exp_name))
colnames(extended_results_table) <- c(
"gene", "cosmid", "name",
"log2FoldChange", "padj",
......@@ -418,7 +475,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
"mean_other_counts"
)
if (biotype %in% c("transposable_elements_wormbase", "transposable_elements_wormbase_reverse")){
print("Appending TE family names")
flog.debug("Appending TE family names")
results_table <- merge(extended_results_table, cosmid_wormid_transposon_family, by="gene")
}
table_filename <- paste(
......@@ -427,16 +484,19 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
"results.txt",
sep="_"
)
print(paste("Writing results to", table_filename))
write.table(
extended_results_table,
file.path(out_dir, table_filename)
)
print(paste("Assigning", paste0(exp_name, "_results")))
assign(
paste0(exp_name, "_results"),
extended_results_table,
envir=parent.frame()
#pos=1
)
print(paste("Assigning", paste0(exp_name, "_lfc_percentiles")))
assign(
paste0(exp_name, "_lfc_percentiles"),
quantile(
......@@ -447,6 +507,7 @@ make_results_table <- function(exp_name, out_dir, ref, other, wormid2cosmid, wor
#pos=1
)
#lfc_percentiles <- get(paste0(exp_name, "lfc_percentiles"))
print(paste("Assigning", paste0(exp_name, "_padj_percentiles")))
assign(
paste0(exp_name, "_padj_percentiles"),
quantile(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment