Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found
Select Git revision

Target

Select target project
  • mborry/coGSEA
  • vguillem/coGSEA
2 results
Select Git revision
Show changes
Commits on Source (9)
......@@ -16,5 +16,5 @@
comparResumPlot = function(preparedData, savePlot = TRUE, legend = TRUE, directoryPath = directoryPath){
print("Plotting Summary comparison Plot for all conditions")
generateSummaryPlots(comparisonSummaryData(preparedData), savePlot = savePlot, legend = legend, file.name = paste(directoryPath, "_comparison_sumplot", sep = ""), format = "pdf")
}
......@@ -18,10 +18,6 @@
generateSummaryPlots <- function(plot.data, file.name = "resumPlot", Xlab="-log10(p-value)",
Ylab="Average Absolute logFC", format = NULL, firstN = 10, savePlot = TRUE, legend = TRUE){
if(!require(ggplot2)){
install.packages("ggplot2")
require(ggplot2)
}
tryCatch({
plot.data.sig = plot.data[plot.data[, "rank"] <= firstN, ]
......@@ -44,23 +40,23 @@ generateSummaryPlots <- function(plot.data, file.name = "resumPlot", Xlab="-log1
# print(dim(plot.data))
if (savePlot == TRUE){
# plot rank-based coloured bubbles
p = qplot(x.data, y.data, data=plot.data, size=gsSize,asp=1,
p = ggplot2::qplot(x.data, y.data, data=plot.data, size=gsSize,asp=1,
colour=rank,
xlab = Xlab, ylab = Ylab,
xlim=xlimits,
ylim=ylimits)
# customize bubbles colour
p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7",
p = p + ggplot2::scale_colour_gradient(guide="colourbar", low="#56B1F7",
high="#000000",
limits=c(1,100), na.value="black", name="Rank")
# customize bubble size
p = p + scale_size("Cardinality", range=c(2,20))
p = p + ggplot2::scale_size("Gene set size", range=c(2,20))
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, ".rank.pdf"), width = 10, height = 7,
useDingbats = FALSE)
# label the bubbles of the top 10 gene sets
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
print(p + ggplot2::geom_text(size=5, mapping=ggplot2::aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
......@@ -68,7 +64,7 @@ generateSummaryPlots <- function(plot.data, file.name = "resumPlot", Xlab="-log1
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, ".rank.png"), width = 800, height = 700)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
print(p + ggplot2::geom_text(size=5, mapping=ggplot2::aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
......@@ -81,7 +77,7 @@ generateSummaryPlots <- function(plot.data, file.name = "resumPlot", Xlab="-log1
# plot direction-based coloured bubbles
top.10.ids = as.character(plot.data[plot.data[, "rank"] <= firstN,
"id"])
sig.ids = setdiff(plot.data[rank(-plot.data[,"sig"], na.last =
sig.ids = ggplot2::setdiff(plot.data[rank(-plot.data[,"sig"], na.last =
TRUE) <= 5, "id"], top.10.ids)
sig.cols = c(rep("black", length(top.10.ids)), rep("blue",
length(sig.ids)))
......@@ -92,18 +88,18 @@ plot.data[, "id"]), ]
xlab = Xlab, ylab = Ylab,
xlim=xlimits,
ylim=ylimits)
p = p + scale_colour_gradient(guide="colourbar", low="#56B1F7",
p = p + ggplot2::scale_colour_gradient(guide="colourbar", low="#56B1F7",
high="#E35F5F",
limits=c(-1,1), na.value="black",
name="Regulation Direction") # low="#5FE377"
p = p + scale_size("significance", range=c(2,20))
p = p + ggplot2::scale_size("significance", range=c(2,20))
if (savePlot == TRUE){
if (is.null(format) || tolower(format) == "pdf"){
pdf(paste0(file.name, "direction_.pdf"), width = 10, height = 7,
useDingbats = FALSE)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
print(p + ggplot2::geom_text(size=5, mapping=ggplot2::aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
......@@ -111,14 +107,14 @@ name="Regulation Direction") # low="#5FE377"
}
if (is.null(format) || tolower(format) == "png"){
png(paste0(file.name, "_direction.png"), width = 800, height = 700)
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
print(p + ggplot2::geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1) )
dev.off()
}
} else if (savePlot == FALSE && legend == TRUE){
print(p + geom_text(size=5, mapping=aes(x=x.data, y=y.data,
print(p + ggplot2::geom_text(size=5, mapping=aes(x=x.data, y=y.data,
label=id),
data=plot.data.sig,
colour=sig.cols, vjust=-1, hjust=1))
......
if(!require(parallel)){
install.packages("parallel")
require(parallel)
}
cGSEA = function(ElistObject,
contrastMatrix,
geneSetCollection = "H",
......@@ -42,10 +37,10 @@ cGSEAcore = function(ElistObject, contrast, geneset = "H", specie = "Mus musculu
if(camera == TRUE){
print("RUNNING CAMERA")
try({
camerastart = Sys.time()
camerastart = base::Sys.time()
res$camera = runcamera(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
camerastop = Sys.time()
time$camera = difftime(camerastop, camerastart, units = c("secs"))
camerastop = base::Sys.time()
time$camera = base::difftime(camerastop, camerastart, units = c("secs"))
})
......@@ -54,100 +49,100 @@ cGSEAcore = function(ElistObject, contrast, geneset = "H", specie = "Mus musculu
if (gage == TRUE){
print("RUNNING GAGE")
try({
gagestart = Sys.time()
gagestart = base::Sys.time()
res$gage = rungage(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
gagestop = Sys.time()
time$gage = difftime(gagestop,gagestart, units = c("secs"))
gagestop = base::Sys.time()
time$gage = base::difftime(gagestop,gagestart, units = c("secs"))
})
}
if (globaltest == TRUE){
print("RUNNING GLOBALTEST")
try({
globalteststart = Sys.time()
globalteststart = base::Sys.time()
res$globaltest = runglobaltest(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
globalteststop = Sys.time()
time$globaltest = difftime(globalteststop,globalteststart, units = c("secs"))
globalteststop = base::Sys.time()
time$globaltest = base::difftime(globalteststop,globalteststart, units = c("secs"))
})
}
if (gsva == TRUE){
print("RUNNING GSVA")
try({
gsvastart = Sys.time()
gsvastart = base::Sys.time()
res$gsva = rungsva(method = "gsva", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
gsvastop = Sys.time()
time$gsva = difftime(gsvastop,gsvastart, units = c("secs"))
gsvastop = base::Sys.time()
time$gsva = base::difftime(gsvastop,gsvastart, units = c("secs"))
})
}
if (ssgsea == TRUE){
print("RUNNING SSGSEA")
try({
ssgseastart = Sys.time()
ssgseastart = base::Sys.time()
res$ssgsea = rungsva(method = "ssgsea", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
ssgseastop = Sys.time()
time$ssgsea = difftime(ssgseastop,ssgseastart, units = c("secs"))
ssgseastop = base::Sys.time()
time$ssgsea = base::difftime(ssgseastop,ssgseastart, units = c("secs"))
})
}
if (zscore == TRUE){
print("RUNNING ZSCORE")
try({
zscorestart = Sys.time()
zscorestart = base::Sys.time()
res$zscore = rungsva(method = "zscore", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
zscorestop = Sys.time()
time$zscore = difftime(zscorestop,zscorestart, units = c("secs"))
zscorestop = base::Sys.time()
time$zscore = base::difftime(zscorestop,zscorestart, units = c("secs"))
})
}
if (plage == TRUE){
print("RUNNING PLAGE ")
try({
plagestart = Sys.time()
plagestart = base::Sys.time()
res$plage = rungsva(method = "plage", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
plagestop = Sys.time()
time$plage = difftime(plagestop,plagestart, units = c("secs"))
plagestop = base::Sys.time()
time$plage = base::difftime(plagestop,plagestart, units = c("secs"))
})
}
if (ora == TRUE){
print("RUNNING ORA")
try({
orastart = Sys.time()
orastart = base::Sys.time()
res$ora = runora(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
orastop = Sys.time()
time$ora = difftime(orastop, orastart, units = c("secs"))
orastop = base::Sys.time()
time$ora = base::difftime(orastop, orastart, units = c("secs"))
})
}
if (padog == TRUE){
print("RUNNING PADOG")
try({
padogstart = Sys.time()
padogstart = base::Sys.time()
res$padog = runpadog(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
padogstop = Sys.time()
time$padog = difftime(padogstop, padogstart, units = c("secs"))
padogstop = base::Sys.time()
time$padog = base::difftime(padogstop, padogstart, units = c("secs"))
})
}
if (roast == TRUE){
print("RUNNING ROAST")
try({
roaststart = Sys.time()
roaststart = base::Sys.time()
res$roast = runroast(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
roaststop = Sys.time()
time$roast = difftime(roaststop,roaststart, units = c("secs"))
roaststop = base::Sys.time()
time$roast = base::difftime(roaststop,roaststart, units = c("secs"))
})
}
if (safe == TRUE){
print("RUNNING SAFE")
try({
safestart = Sys.time()
safestart = base::Sys.time()
res$safe = runsafe(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
safestop = Sys.time()
time$safe = difftime(safestop, safestart, units = c("secs"))
safestop = base::Sys.time()
time$safe = base::difftime(safestop, safestart, units = c("secs"))
})
}
......
runTimePlot = function(preparedData, savePlot = TRUE, directoryPath = directoryPath){
print("Plotting runtime")
pdf(paste(directoryPath,"cGSEA_methods_runtime.pdf",sep = ""), width = 10, height = 7, useDingbats = FALSE)
barplot(unlist(preparedData$time), las = 2, ylab = "second", main = "RunTime")
graphics::barplot(unlist(preparedData$time), las = 2, ylab = "second", main = "RunTime")
dev.off()
}
......@@ -9,85 +9,66 @@ runTimePlot = function(preparedData, savePlot = TRUE, directoryPath = directoryP
clusteringPlot = function(preparedData, contrCondi, savePlot = TRUE, directoryPath = directoryPath){
print(paste("Plotting cluster Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_clustering.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
plot(preparedData$clustering[[contrCondi]], hang = -0.1)
graphics::plot(preparedData$clustering[[contrCondi]], hang = -0.1)
dev.off()
}
heatmapPlot = function(preparedData, contrCondi, savePlot = TRUE, directoryPath = directoryPath){
if(!require(pheatmap)){
install.packages("pheatmap")
require(pheatmap)
}
if(savePlot == TRUE){
print(paste("Plotting heatmap for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_pvalue_heatmap.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
pheatmap(preparedData$heatmap[[contrCondi]], fontsize_row = 5)
pheatmap::pheatmap(preparedData$heatmap[[contrCondi]], fontsize_row = 5)
dev.off()
} else {
pheatmap(preparedData$heatmap[[contrCondi]])
pheatmap::pheatmap(preparedData$heatmap[[contrCondi]])
}
}
pcaPlot = function(preparedData, contrCondi, savePlot = TRUE, directoryPath = directoryPath){
if(!require(FactoMineR)){
install.packages("FactoMineR")
require(FactoMineR)
}
res.pca = PCA(preparedData$PCA[[contrCondi]], scale.unit = T, axes = c(1,2), graph = FALSE)
res.pca = FactoMineR::PCA(preparedData$PCA[[contrCondi]], scale.unit = T, axes = c(1,2), graph = FALSE)
eigen = res.pca$eig$`percentage of variance`
names(eigen) = rownames(res.pca$eig)
base::names(eigen) = base::rownames(res.pca$eig)
print(paste("Plotting Eigen values Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_eigen_fall.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
barplot(eigen, las = 2, ylab = "%")
graphics::barplot(eigen, las = 2, ylab = "%")
dev.off()
print(paste("Plotting PCA Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_pca.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
plot(res.pca, choix = 'ind')
graphics::plot(res.pca, choix = 'ind')
dev.off()
}
corPlot = function(preparedData, contrCondi, savePlot = TRUE, directoryPath = directoryPath){
if(!require(corrplot)){
install.packages("corrplot")
require(corrplot)
}
print(paste("Plotting correlation Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_correlation.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
corrplot(preparedData$correlation[[contrCondi]], method = "circle", type = "full", order = "hclust",mar=c(10, 4, 4, 2) + 0.1)
corrplot::corrplot(preparedData$correlation[[contrCondi]], method = "circle", type = "full", order = "hclust",mar=c(10, 4, 4, 2) + 0.1)
dev.off()
}
upsetrPlot = function(preparedData, contrCondi, savePlot = TRUE, directoryPath = directoryPath){
if(!require(UpSetR)){
install.packages("UpSetR")
require(UpSetR)
}
print(paste("Plotting UpsetR Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_upsetr.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
upset(fromList(preparedData$snailPlot[[contrCondi]]),order.by = "freq")
UpSetR::upset(fromList(preparedData$snailPlot[[contrCondi]]),order.by = "freq")
dev.off()
}
snailPlot = function(preparedData, contrCondi, savePlot = TRUE, min.intersection.size = 1, directoryPath = directoryPath){
if(!require(SuperExactTest)){
install.packages("SuperExactTest")
require(SuperExactTest)
}
snail = supertest(preparedData$snailPlot[[contrCondi]], n = length(levels(preparedData$snailPlot[[contrCondi]][[1]])))
snail = SuperExactTest::supertest(preparedData$snailPlot[[contrCondi]], n = length(levels(preparedData$snailPlot[[contrCondi]][[1]])))
print(paste("Plotting Snail Plot for condition", contrCondi))
if (savePlot == TRUE){
pdf(paste(directoryPath, contrCondi, "_snailplot.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
plot.msets(snail, sort.by="size", keep.empty.intersections=FALSE, min.intersection.size = min.intersection.size)
SuperExactTest::plot.msets(snail, sort.by="size", keep.empty.intersections=FALSE, min.intersection.size = min.intersection.size)
dev.off()
} else {
plot.msets(snail, sort.by="size", keep.empty.intersections=FALSE, min.intersection.size = min.intersection.size)
SuperExactTest::plot.msets(snail, sort.by="size", keep.empty.intersections=FALSE, min.intersection.size = min.intersection.size)
}
}
......@@ -96,11 +77,11 @@ resumPlot1 = function(preparedData, contrCondi, savePlot = TRUE, directoryPath =
print(paste("Plotting simple Summary Plot for condition", contrCondi))
pdf(paste(directoryPath, contrCondi, "_simple_sumplot.pdf", sep = ""), width = 10, height = 7, useDingbats = FALSE)
plot(preparedData$resumPlot1[[contrCondi]][["x"]], preparedData$resumPlot1[[contrCondi]][["y"]], xlab = '-log10(pVal)', ylab = "avg logFC")
abline(h = 0)
abline(v = 0)
text(preparedData$resumPlot1[[contrCondi]][['x']], preparedData$resumPlot1[[contrCondi]][['y']], labels=preparedData$resumPlot1[[contrCondi]][['labels']], cex= 0.7)
abline(v = -log10(0.05), col = "red")
graphics::plot(preparedData$resumPlot1[[contrCondi]][["x"]], preparedData$resumPlot1[[contrCondi]][["y"]], xlab = '-log10(pVal)', ylab = "avg logFC")
graphics::abline(h = 0)
graphics::abline(v = 0)
graphics::text(preparedData$resumPlot1[[contrCondi]][['x']], preparedData$resumPlot1[[contrCondi]][['y']], labels=preparedData$resumPlot1[[contrCondi]][['labels']], cex= 0.7)
graphics::abline(v = -log10(0.05), col = "red")
dev.off()
}
......@@ -111,9 +92,9 @@ resumPlot2 = function(preparedData, contrCondi, savePlot = TRUE, directoryPath=
}
cGSEAMakePlots = function(preparedData, directoryPath){
dir.create(file.path(directoryPath,"/plots/"), showWarnings = FALSE)
base::dir.create(base::file.path(directoryPath,"/plots/"), showWarnings = FALSE)
for (condi in names(preparedData$result)){
dir.create(file.path(directoryPath,"/plots/", condi), showWarnings = FALSE)
base::dir.create(file.path(directoryPath,"/plots/", condi), showWarnings = FALSE)
clusteringPlot(preparedData = preparedData, contrCondi = condi, directoryPath = paste0(directoryPath,"/plots/",condi,"/"))
pcaPlot(preparedData = preparedData, contrCondi = condi, directoryPath = paste0(directoryPath,"/plots/",condi,"/"))
corPlot(preparedData = preparedData, contrCondi = condi, directoryPath = paste0(directoryPath,"/plots/",condi,"/"))
......
......@@ -5,24 +5,18 @@
runcamera <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!(require(limma))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
require(limma)
}
# run CAMERA and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
# camera.results = vector("list", ncol(contrast))
args.all = list()
for(i in 1:contr.num){
if (is.matrix(contrast))
if (base::is.matrix(contrast))
args.all[[i]] = list(voom.results = voom.results,
contrast.name = contr.names[i],
contrast = contrast[,i],
......@@ -38,11 +32,11 @@ runcamera <- function(voom.results, contrast, genesetIdx,
)
# print(args.all[[i]])
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
camera.results = lapply(args.all, runcamera.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
camera.results = base::lapply(args.all, runcamera.contrast)
else
camera.results = mclapply(args.all, runcamera.contrast,
camera.results = parallel::mclapply(args.all, runcamera.contrast,
mc.cores=num.workers)
# names(camera.results) = colnames(contrast)
return(camera.results)
......@@ -53,18 +47,18 @@ runcamera.contrast <- function(args){
print(paste0(" Running CAMERA for ", args$contrast.name))
else
cat(".")
camera.results = camera(y=args$voom.results,
camera.results = limma::camera(y=args$voom.results,
index=args$genesetIdx,
design=args$voom.results$design,
contrast=args$contrast) # , allow.neg.cor=TRUE, inter.gene.cor=NA
# print(head(camera.results[[i]]))
camera.results =
camera.results[order(camera.results[,"PValue"]),]
camera.results[base::order(camera.results[,"PValue"]),]
camera.results = cbind(Rank=seq(1,
nrow(camera.results)), camera.results)
colnames(camera.results)[which(colnames(camera.results) == "PValue")] = "p.value"
camera.results = base::cbind(Rank=base::seq(1,
base::nrow(camera.results)), camera.results)
base::colnames(camera.results)[base::which(base::colnames(camera.results) == "PValue")] = "p.value"
return(camera.results)
}
......@@ -75,24 +69,19 @@ runcamera.contrast <- function(args){
runfry <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!(require(limma))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
require(limma)
}
# run fry and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
# fry.results = vector("list", ncol(contrast))
args.all = list()
for(i in 1:contr.num){
if (is.matrix(contrast))
if (base::is.matrix(contrast))
args.all[[i]] = list(voom.results = voom.results,
contrast.name = contr.names[i],
contrast = contrast[,i],
......@@ -107,11 +96,11 @@ runfry <- function(voom.results, contrast, genesetIdx,
verbose = verbose
)
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
fry.results = lapply(args.all, runfry.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
fry.results = base::lapply(args.all, runfry.contrast)
else
fry.results = mclapply(args.all, runfry.contrast,
fry.results = parallel::mclapply(args.all, runfry.contrast,
mc.cores=num.workers)
# names(fry.results) = colnames(contrast)
return(fry.results)
......@@ -122,17 +111,17 @@ runfry.contrast <- function(args){
print(paste0(" Running FRY for ", args$contrast.name))
else
cat(".")
capture.output(fry.results <- fry(y=args$voom.results,
capture.output(fry.results <- limma::fry(y=args$voom.results,
index=args$genesetIdx,
design=args$voom.results$design,
contrast=args$contrast))
# returns PropDown/PropUp ==> proportion of genes that are
# down/up-regulated
fry.results = fry.results[order(fry.results[,
fry.results = fry.results[base::order(fry.results[,
"PValue"]),]
fry.results = cbind(Rank=seq(1,
nrow(fry.results)), fry.results)
colnames(fry.results)[which(colnames(fry.results) == "PValue")] = "p.value"
fry.results = base::cbind(Rank=base::seq(1,
base::nrow(fry.results)), fry.results)
base::colnames(fry.results)[base::which(base::colnames(fry.results) == "PValue")] = "p.value"
return(fry.results)
}
......@@ -142,18 +131,14 @@ runfry.contrast <- function(args){
rungage <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!require(gage)){
source("https://bioconductor.org/biocLite.R")
biocLite("gage")
require(gage)
}
# run GAGE and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
groupData = prepareTwoGroupsData(voom.results, contrast, genesetIdx,
......@@ -169,16 +154,16 @@ rungage <- function(voom.results, contrast, genesetIdx,
genesetIdx = genesetIdx,
verbose = verbose)
}
names(args.all) = contr.names
base::names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
gage.results = lapply(args.all, rungage.contrast)
gage.results = base::lapply(args.all, rungage.contrast)
else
gage.results = mclapply(args.all, rungage.contrast,
gage.results = parallel::mclapply(args.all, rungage.contrast,
mc.cores=num.workers)
#stop("here")
names(gage.results) = colnames(contrast)
for (i in length(names(gage.results))){
gage.results[[i]][which(gage.results[[i]]$set.size == 1),"p.value"] = 1
base::names(gage.results) = base::colnames(contrast)
for (i in base::length(base::names(gage.results))){
gage.results[[i]][base::which(gage.results[[i]]$set.size == 1),"p.value"] = 1
}
return(gage.results)
# return(tmp)
......@@ -191,7 +176,7 @@ rungage.contrast <- function(args){
cat(".")
groupData = args$groupData
# same.dir=FALSE ==> Two directional test
gage.results = gage(exprs=args$logCPM,
gage.results = gage::gage(exprs=args$logCPM,
gsets=args$gsets,
ref=args$group1,
samp=args$group2, set.size = c(0,2000), same.dir=FALSE,
......@@ -200,10 +185,10 @@ rungage.contrast <- function(args){
# down/up-regulated
gage.results = gage.results[ order (
gage.results[,"p.val"]),]
gage.results = cbind(Rank=seq(1, nrow(gage.results)),
gage.results = base::cbind(Rank=base::seq(1, base::nrow(gage.results)),
gage.results)
colnames(gage.results)[which(colnames(gage.results) == "p.val")] = "p.value"
return(as.data.frame(gage.results))
base::colnames(gage.results)[base::which(base::colnames(gage.results) == "p.val")] = "p.value"
return(base::as.data.frame(gage.results))
}
####################################################
......@@ -213,22 +198,17 @@ rungage.contrast <- function(args){
runglobaltest <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!require(globaltest)){
source("https://bioconductor.org/biocLite.R")
biocLite("globaltest")
require(globaltest)
}
# run globaltest and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
gt.options(transpose=TRUE)
globaltest::gt.options(transpose=TRUE)
groupData = prepareTwoGroupsData(voom.results, contrast, genesetIdx,
verbose = verbose)
# globaltest.results = vector("list", ncol(contrast))
......@@ -242,11 +222,11 @@ runglobaltest <- function(voom.results, contrast, genesetIdx,
genesetIdx = genesetIdx,
verbose = verbose)
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
globaltest.results = lapply(args.all, runglobaltest.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
globaltest.results = base::lapply(args.all, runglobaltest.contrast)
else
globaltest.results = mclapply(args.all, runglobaltest.contrast,
globaltest.results = parallel::mclapply(args.all, runglobaltest.contrast,
mc.cores=num.workers)
# names(globaltest.results) = colnames(contrast)
return(globaltest.results)
......@@ -259,25 +239,25 @@ runglobaltest.contrast <- function(args){
else
cat(".")
groupIndx = c(args$group2,
groupIndx = base::c(args$group2,
args$group1)
data.log.sel = args$logCPM[, groupIndx]
group = c(rep("d", length(args$group2)),
rep("c", length(args$group1)))
colnames(data.log.sel) = group
group = base::c(base::rep("d", base::length(args$group2)),
base::rep("c", base::length(args$group1)))
base::colnames(data.log.sel) = group
# perform the globaltest
globaltest.results = result(gt(factor(group),
globaltest.results = globaltest::result(globaltest::gt(factor(group),
data.log.sel, subsets=args$genesetIdx,
permutations=10000))
globaltest.results = globaltest.results[
order(globaltest.results[,"p-value"],
base::order(globaltest.results[,"p-value"],
-globaltest.results[,"Statistic"]),]
globaltest.results = cbind(
Rank=seq(1, nrow(globaltest.results)),
globaltest.results = base::cbind(
Rank=base::seq(1, base::nrow(globaltest.results)),
globaltest.results)
colnames(globaltest.results)[which(
colnames(globaltest.results) == "p-value")] = "p.value"
base::colnames(globaltest.results)[base::which(
base::colnames(globaltest.results) == "p-value")] = "p.value"
return(globaltest.results)
}
......@@ -287,73 +267,69 @@ runglobaltest.contrast <- function(args){
rungsva <- function(method, voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!require(GSVA)){
source("https://bioconductor.org/biocLite.R")
biocLite("GSVA")
require(GSVA)
}
# run gsva and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
gsets = list()
for (j in 1:length(genesetIdx)){
gsets[[j]] = as.character(genesetIdx[[j]])
for (j in 1:base::length(genesetIdx)){
gsets[[j]] = base::as.character(genesetIdx[[j]])
}
names(gsets) = names(genesetIdx)
base::names(gsets) = base::names(genesetIdx)
if (verbose)
print(paste0(" Calculating gene set-level stats using ",
toupper(method)))
base::toupper(method)))
else
cat(".")
# transform scores in gene set space using parallel computing
data.log = voom.results$E
rownames(data.log) = as.character(seq(1, nrow(data.log)))
base::rownames(data.log) = base::as.character(base::seq(1, base::nrow(data.log)))
gs.es = calculateSetScores.parallel(data.log, gsets, method, num.workers)
# fit the gene set scores and find DE gene sets
gs.fit = lmFit(gs.es, design=voom.results$design)
if (is.matrix(contrast)){
gs.fit = contrasts.fit(gs.fit, contrast)
gs.fit = limma::lmFit(gs.es, design=voom.results$design)
if (base::is.matrix(contrast)){
gs.fit = limma::contrasts.fit(gs.fit, contrast)
coefs = 1:contr.num
}else{
coefs = contrast
}
gs.fit = eBayes(gs.fit)
gsva.results = vector("list", contr.num)
gs.fit = limma::eBayes(gs.fit)
gsva.results = base::vector("list", contr.num)
for(i in 1:contr.num){
if (verbose)
print(paste0(" Running ", toupper(method)," for ",
print(paste0(" Running ", base::toupper(method)," for ",
contr.names[i]))
else
cat(".")
gsva.results[[i]] = topTable(gs.fit, coef=coefs[i], number=Inf, sort.by="p",
gsva.results[[i]] = limma::topTable(gs.fit, coef=coefs[i], number=Inf, sort.by="p",
adjust.method="BH")
gsva.results[[i]] = gsva.results[[i]][order(gsva.results[[i]][, "P.Value"],
gsva.results[[i]] = gsva.results[[i]][base::order(gsva.results[[i]][, "P.Value"],
-gsva.results[[i]][, "B"]),]
gsva.results[[i]] = cbind(Rank=seq(1, nrow(gsva.results[[i]])),
gsva.results[[i]] = base::cbind(Rank=base::seq(1, base::nrow(gsva.results[[i]])),
gsva.results[[i]])
colnames(gsva.results[[i]])[which(
colnames(gsva.results[[i]]) == "P.Value")] = "p.value"
base::colnames(gsva.results[[i]])[base::which(
base::colnames(gsva.results[[i]]) == "P.Value")] = "p.value"
}
names(gsva.results) = contr.names
base::names(gsva.results) = contr.names
return(gsva.results)
}
calculateSetScores.parallel <- function(data.log, gsets, method, num.workers){
args.all = list()
sets.per.task = 50
total.tasks = ceiling(length(gsets) / sets.per.task)
total.tasks = base::ceiling(length(gsets) / sets.per.task)
for (i in 1:total.tasks){
gsetsi = gsets[((i-1)*sets.per.task + 1):(i*sets.per.task)]
gsetsi = gsetsi[!sapply(gsetsi, is.null)]
gsetsi = gsetsi[!base::sapply(gsetsi, is.null)]
args.all[[paste0("task", i)]] = list(
data.log = data.log,
gsets = gsetsi,
......@@ -361,22 +337,22 @@ calculateSetScores.parallel <- function(data.log, gsets, method, num.workers){
)
}
# parallelize the calculation of gene set scores to speed up the algorithms
if (Sys.info()['sysname'] == "Windows")
gs.es.all = lapply(args.all, rungsva.subcollection)
if (base::Sys.info()['sysname'] == "Windows")
gs.es.all = base::lapply(args.all, rungsva.subcollection)
else
gs.es.all = mclapply(args.all, rungsva.subcollection,
gs.es.all = parallel::mclapply(args.all, rungsva.subcollection,
mc.cores=num.workers)
# collect gene set scores from differet workers
gs.es = c()
for (i in 1:total.tasks)
gs.es = rbind(gs.es, gs.es.all[[paste0("task", i)]])
rownames(gs.es) = names(gsets)
gs.es = base::rbind(gs.es, gs.es.all[[paste0("task", i)]])
base::rownames(gs.es) = base::names(gsets)
return(gs.es)
}
rungsva.subcollection <- function(args){
set.seed(519863)
gs.es = gsva(expr=args$data.log, gset.idx.list=args$gsets, mx.diff=TRUE,
base::set.seed(519863)
gs.es = GSVA::gsva(expr=args$data.log, gset.idx.list=args$gsets, mx.diff=TRUE,
min.sz=1,
method=args$method, parallel.sz=1,
verbose=FALSE, rnaseq=FALSE)#$es.obs
......@@ -402,49 +378,43 @@ runora <- function(voom.results, contrast, genesetIdx, num.workers=4, verbose =
# phyper(100, 3000, 12000, 400)
#
if(!(require(limma))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
require(limma)
}
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
if (!is.null(voom.results$E)){
vfit = lmFit(voom.results, voom.results$design)
if (is.matrix(contrast)){
vfit = contrasts.fit(vfit, contrast)
if (!base::is.null(voom.results$E)){
vfit = limma::lmFit(voom.results, voom.results$design)
if (base::is.matrix(contrast)){
vfit = limma::contrasts.fit(vfit, contrast)
coefs = 1:contr.num
}else{
coefs = contrast
}
vfit = eBayes(vfit)
vfit = limma::eBayes(vfit)
pvalue.cut=0.05
logfc.cut=1
universe = unlist(genesetIdx)
universe = base::unlist(genesetIdx)
}
else if (!is.null(voom.results$featureIDs)){ # ORA Analysis
else if (!base::is.null(voom.results$featureIDs)){ # ORA Analysis
universe = voom.results$featureIDs
}
ora.results = vector("list", contr.num)
ora.results = base::vector("list", contr.num)
for(i in 1:contr.num){
if (verbose)
print(paste0(" Running ORA for ", contr.names[i]))
else
cat(".")
if (!is.null(voom.results$E)){
deGenes = rownames(topTable(vfit, coef=coefs[i], number=Inf,
if (!base::is.null(voom.results$E)){
deGenes = base::rownames(limma::topTable(vfit, coef=coefs[i], number=Inf,
p.value=pvalue.cut,
lfc=logfc.cut))
if (length(deGenes) == 0){
deGenes = rownames(topTable(vfit, coef=i, number=Inf,
if (base::length(deGenes) == 0){
deGenes = base::rownames(limma::topTable(vfit, coef=i, number=Inf,
p.value=pvalue.cut,
lfc=0))
if (verbose)
......@@ -454,37 +424,37 @@ runora <- function(voom.results, contrast, genesetIdx, num.workers=4, verbose =
}else{
deGenes = voom.results$ids
}
deGenes = deGenes[which(deGenes %in% universe)]
deGenes = deGenes[base::which(deGenes %in% universe)]
ora.stats = runora.collection(deGenes, genesetIdx, universe)
ora.results[[i]] = ora.stats
# print(head(ora.results[[i]]))
ora.results[[i]] =
ora.results[[i]][order(ora.results[[i]][,"p.value"]),] # order by p.value
ora.results[[i]] = cbind(Rank=seq(1, nrow(ora.results[[i]])),
ora.results[[i]][base::order(ora.results[[i]][,"p.value"]),] # order by p.value
ora.results[[i]] = base::cbind(Rank=base::seq(1, base::nrow(ora.results[[i]])),
ora.results[[i]])
}
names(ora.results) = colnames(contrast)
base::names(ora.results) = base::colnames(contrast)
return(ora.results)
}
runora.collection <- function(deGenes, genesetIdx, universe){
tmp = rep(NA, length(genesetIdx))
tmp = base::rep(NA, base::length(genesetIdx))
ora.stats = data.frame(p.value=tmp, p.adj = tmp) # , NumGenes=tmp
totalDE = length(deGenes)
n = length(universe) - totalDE
for (j in 1:length(genesetIdx)){
totalDE = base::length(deGenes)
n = base::length(universe) - totalDE
for (j in 1:base::length(genesetIdx)){
gset = genesetIdx[[j]]
totalDEinS = length(intersect(gset, deGenes))
totalSinUniverse = length(intersect(gset, universe))
ora.stats[j, "p.value"] = phyper(q = totalDEinS- 0.5, m=
totalDEinS = base::length(base::intersect(gset, deGenes))
totalSinUniverse = base::length(base::intersect(gset, universe))
ora.stats[j, "p.value"] = stats::phyper(q = totalDEinS- 0.5, m=
totalDE, n = n,
k = totalSinUniverse, lower.tail = FALSE)
#ora.stats[j, "NumGenes"] = totalDEinS
}
ora.stats[, "p.adj"] = p.adjust(ora.stats[, "p.value"], method = "BH")
row.names(ora.stats) = names(genesetIdx)
ora.stats[, "p.adj"] = stats::p.adjust(ora.stats[, "p.value"], method = "BH")
base::row.names(ora.stats) = base::names(genesetIdx)
return ( ora.stats)
}
......@@ -494,19 +464,14 @@ totalDE, n = n,
runpadog <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!require(PADOG)){
source("http://www.bioconductor.org/biocLite.R")
biocLite("PADOG")
require(PADOG)
}
# run padog and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
groupData = prepareTwoGroupsData(voom.results, contrast, genesetIdx,
......@@ -522,11 +487,11 @@ runpadog <- function(voom.results, contrast, genesetIdx,
genesetIdx = genesetIdx,
verbose = verbose)
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
padog.results = lapply(args.all, runpadog.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
padog.results = base::lapply(args.all, runpadog.contrast)
else
padog.results = mclapply(args.all, runpadog.contrast,
padog.results = parallel::mclapply(args.all, runpadog.contrast,
mc.cores=num.workers)
# names(padog.results) = colnames(contrast)
return(padog.results)
......@@ -538,21 +503,21 @@ runpadog.contrast <- function(args){
else
cat(".")
group = c(rep("c", length(args$group1)),
rep("d", length(args$group2)))
group = base::c(base::rep("c", base::length(args$group1)),
base::rep("d", base::length(args$group2)))
padog.results = padog(esetm=args$logCPM,
padog.results = PADOG::padog(esetm=args$logCPM,
group=group, paired=FALSE,
gslist=args$gsets, NI=100, Nmin = 1,
verbose=FALSE)
padog.results = padog.results[order(
padog.results = padog.results[base::order(
padog.results[,"Ppadog"],
-padog.results[,"padog0"]),]
padog.results = cbind(
Rank = seq(1, nrow(padog.results)),
padog.results = base::cbind(
Rank = base::seq(1, base::nrow(padog.results)),
padog.results)
colnames(padog.results)[which(colnames(padog.results) == "Ppadog")] = "p.value"
base::colnames(padog.results)[base::which(base::colnames(padog.results) == "Ppadog")] = "p.value"
return(padog.results)
}
......@@ -562,25 +527,21 @@ runpadog.contrast <- function(args){
runroast <- function(voom.results, contrast, genesetIdx,
num.workers=4, nrot = 999, verbose = TRUE){
if(!(require(limma))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
require(limma)
}
# run ROAST and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
# roast.results = vector("list", ncol(contrast))
args.all = list()
for(i in 1:contr.num){
if (is.matrix(contrast))
if (base::is.matrix(contrast))
args.all[[i]] = list(voom.results = voom.results,
contrast.name = contr.names[i],
contrast = contrast[,i],
......@@ -595,11 +556,11 @@ runroast <- function(voom.results, contrast, genesetIdx,
verbose = verbose
)
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
roast.results = lapply(args.all, runroast.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
roast.results = base::lapply(args.all, runroast.contrast)
else
roast.results = mclapply(args.all, runroast.contrast,
roast.results = parallel::mclapply(args.all, runroast.contrast,
mc.cores=num.workers)
# names(roast.results) = colnames(contrast)
return(roast.results)
......@@ -610,17 +571,17 @@ runroast.contrast <- function(args){
print(paste0(" Running ROAST for ", args$contrast.name))
else
cat(".")
roast.results = mroast(y=args$voom.results,
roast.results = limma::mroast(y=args$voom.results,
index=args$genesetIdx,
design=args$voom.results$design,
contrast=args$contrast, nrot=999)
# returns PropDown/PropUp ==> proportion of genes that are
# down/up-regulated
roast.results = roast.results[order(roast.results[,
roast.results = roast.results[base::order(roast.results[,
"PValue"]),]
roast.results = cbind(Rank=seq(1,
nrow(roast.results)), roast.results)
colnames(roast.results)[which(colnames(roast.results) == "PValue")] = "p.value"
roast.results = base::cbind(Rank=base::seq(1,
base::nrow(roast.results)), roast.results)
base::colnames(roast.results)[base::which(base::colnames(roast.results) == "PValue")] = "p.value"
return(roast.results)
}
......@@ -631,25 +592,20 @@ runroast.contrast <- function(args){
runsafe <- function(voom.results, contrast, genesetIdx,
num.workers=4, verbose = TRUE){
if(!(require(safe))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("safe")
require(safe)
}
# run safe and write out ranked 'gene sets' for each 'contrast'
if (is.matrix(contrast)){
contr.names = colnames(contrast)
contr.num = ncol(contrast)
if (base::is.matrix(contrast)){
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
}
groupData = prepareTwoGroupsData(voom.results, contrast, genesetIdx,
min.samples = 3, verbose)
capture.output(C.mat <- getCmatrix(keyword.list=groupData$gsets, min.size = 1,
present.genes=rownames(groupData$data[[1]]$logCPM)))
utils::capture.output(C.mat <- safe::getCmatrix(keyword.list=groupData$gsets, min.size = 1,
present.genes=base::rownames(groupData$data[[1]]$logCPM)))
# safe.results = vector("list", ncol(contrast))
args.all = list()
for(i in 1:contr.num){
......@@ -662,11 +618,11 @@ runsafe <- function(voom.results, contrast, genesetIdx,
genesetIdx = genesetIdx,
verbose = verbose)
}
names(args.all) = contr.names
if (Sys.info()['sysname'] == "Windows" || contr.num <= 1)
safe.results = lapply(args.all, runsafe.contrast)
base::names(args.all) = contr.names
if (base::Sys.info()['sysname'] == "Windows" || contr.num <= 1)
safe.results = base::lapply(args.all, runsafe.contrast)
else
safe.results = mclapply(args.all, runsafe.contrast,
safe.results = parallel::mclapply(args.all, runsafe.contrast,
mc.cores=num.workers)
# names(safe.results) = colnames(contrast)
return(safe.results)
......@@ -677,29 +633,29 @@ runsafe.contrast <- function(args){
print(paste0(" Running SAFE for ", args$contrast))
else
cat(".")
group = c(rep("Ctr", length(args$group1)),
rep("Trt", length(args$group2)))
group = base::c(base::rep("Ctr", base::length(args$group1)),
base::rep("Trt", base::length(args$group2)))
capture.output(temp <- safe(X.mat=args$logCPM,
utils::capture.output(temp <- safe::safe(X.mat=args$logCPM,
y.vec=group, C.mat=args$C.mat,
print.it=FALSE, Pi.mat=100))#,
safe.results = safe.toptable(temp,
number=length(args$gsets),
safe.results = safe::safe.toptable(temp,
number=base::length(args$gsets),
description = FALSE)
# safe.results = na.omit(safe.results)
# rownames(safe.results) = names(args$genesetIdx)
safe.results[, "P.value"] = as.numeric(
safe.results[, "P.value"] = base::as.numeric(
safe.results[, "P.value"])
safe.results[, "Adj.p.value"] = as.numeric(
safe.results[, "Adj.p.value"] = base::as.numeric(
safe.results[, "Adj.p.value"])
rownames(safe.results) = safe.results[, "GenesetID"]
base::rownames(safe.results) = safe.results[, "GenesetID"]
safe.results = safe.results[, -c(1,6)]
safe.results = safe.results[order(
safe.results[,"P.value"],
-safe.results[,"Statistic"]),]
safe.results = cbind(Rank = seq(1,
nrow(safe.results)), safe.results)
colnames(safe.results)[which(colnames(safe.results) == "P.value")] = "p.value"
safe.results = base::cbind(Rank = base::seq(1,
base::nrow(safe.results)), safe.results)
base::colnames(safe.results)[base::which(base::colnames(safe.results) == "P.value")] = "p.value"
return(safe.results)
}
......@@ -707,85 +663,77 @@ runsafe.contrast <- function(args){
################ SetRank ############################
#####################################################
# runsetrank = function(voom.results, contrast, geneset = "H", specie = "Mus musculus", collection = NULL, num.workers=4, verbose = TRUE, lfcCuttof = 0, pCutoff = 1, fdrCutoff = 1, directory = getwd()){
# if(!(require(SetRank))){
# install.packages("SetRank")
# require(SetRank)
# }
#
# if(!(require(edgeR))){
# install.packages("edgeR")
# require(edgeR)
# }
# collection_names = c("maxSetSize","referenceSet","sets","g","bigSets","intersection.p.cutoff","intersections","iMatrix")
#
# options(mc.cores = num.workers)
#
# if (specie == "Mus musculus" && is.null(collection) == TRUE){
# if (geneset == "H"){
# # load(paste(directory,"/genesets/setRankMusCollection_H.RData", sep = ""))
# theCollection = MusCollectionH
# } else if (geneset == "C2_KEGG"){
# # load(paste(directory,"/genesets/setRankMusCollection_C2.RData", sep = ""))
# theCollection = MusCollectionC2Kegg
# } else if (geneset == "C2_REACTOME"){
# theCollection = MusCollectionC2Reactome
# } else {
# cat(paste(geneset , "is not a valid MSigDB geneset for SetRank. Available genesets : \n - H \n - C2"))
# return(NULL)
# }
# } else if (specie == "Homo sapiens"){
# if (geneset == "H"){
# # load(paste(directory,"genesets/setRankHomoCollection_H.RData", sep = ""))
# theCollection = HomoCollectionH
# } else if (geneset == "C2_KEGG"){
# # load(paste(directory,"genesets/setRankHomoCollection_C2.RData", sep = ""))
# theCollection = HomoCollectionC2Kegg
# } else if (geneset == "C2_REACTOME"){
# theCollection = HomoCollectionC2Reactome
# } else {
# cat(paste(geneset , "is not a valid MSigDB geneset for SetRank. Available genesets : \n - H \n - C2"))
# return(NULL)
# }
# } else if ( is.null(collection) == FALSE){
# if (names(collection) == collection_names){
# theCollection = collection
# } else {
# cat(paste(collection,"is not a valid SetRank collection object"))
# }
#
# }
#
# else {
# cat(paste(specie,"is not a valid specie for SetRank. Available species : \n - Mus musculus \n - Homo sapiens"))
# return(NULL)
# }
#
# dir.create(file.path(getwd(), "SetRankResults"), showWarnings = FALSE)
#
# voomFit = lmFit(voom.results, voom.results$design)
# voomContrast = contrasts.fit(voomFit, contrasts = contrast)
# voomTreat = treat(voomContrast, lfc = lfcCuttof)
#
# setrank.results = list()
#
# for (i in seq(1,length(colnames(contrast)))){
# if (verbose == TRUE){
# cat(paste(" Running SetRank for", colnames(contrast)[i], "contrast"))
# }
#
# network = setRankAnalysis(rownames(topTreat(voomTreat, coef = i, n = Inf)),setCollection = theCollection, use.ranks = TRUE, setPCutoff = pCutoff, fdrCutoff = fdrCutoff)
# exportSingleResult(network = network, selectedGenes = rownames(topTreat(voomTreat, coef = i, n = Inf)), collection = theCollection, networkName = colnames(contrast)[i], outputPath = "./SetRankResults/")
# fh = read.table(paste("./SetRankResults/",colnames(contrast)[i],"_pathways.txt", sep = ""), header = TRUE)
# fh$Rank = seq(1,dim(fh)[1])
#
# fh2 = cbind(fh$Rank,fh$size,fh$setRank,fh$pSetRank,fh$correctedPValue,fh$adjustedPValue)
# rownames(fh2) = fh$description
# colnames(fh2) = c("Rank","Size", "SetRank_Rank", "p.value", "Adj.p.value", "fdr")
# fh = fh2
# remove(fh2)
#
# setrank.results[[colnames(contrast)[i]]] = fh
# }
# return(setrank.results)
# }
runsetrank = function(voom.results, contrast, geneset = "H", specie = "Mus musculus", collection = NULL, num.workers=4, verbose = TRUE, lfcCuttof = 0, pCutoff = 1, fdrCutoff = 1, directory = getwd()){
collection_names = c("maxSetSize","referenceSet","sets","g","bigSets","intersection.p.cutoff","intersections","iMatrix")
base::options(mc.cores = num.workers)
if (specie == "Mus musculus" && base::is.null(collection) == TRUE){
if (geneset == "H"){
# load(paste(directory,"/genesets/setRankMusCollection_H.RData", sep = ""))
theCollection = MusCollectionH
} else if (geneset == "C2_KEGG"){
# load(paste(directory,"/genesets/setRankMusCollection_C2.RData", sep = ""))
theCollection = MusCollectionC2Kegg
} else if (geneset == "C2_REACTOME"){
theCollection = MusCollectionC2Reactome
} else {
cat(paste(geneset , "is not a valid MSigDB geneset for SetRank. Available genesets : \n - H \n - C2"))
return(NULL)
}
} else if (specie == "Homo sapiens"){
if (geneset == "H"){
# load(paste(directory,"genesets/setRankHomoCollection_H.RData", sep = ""))
theCollection = HomoCollectionH
} else if (geneset == "C2_KEGG"){
# load(paste(directory,"genesets/setRankHomoCollection_C2.RData", sep = ""))
theCollection = HomoCollectionC2Kegg
} else if (geneset == "C2_REACTOME"){
theCollection = HomoCollectionC2Reactome
} else {
cat(paste(geneset , "is not a valid MSigDB geneset for SetRank. Available genesets : \n - H \n - C2"))
return(NULL)
}
} else if ( base::is.null(collection) == FALSE){
if (base::names(collection) == collection_names){
theCollection = collection
} else {
cat(paste(collection,"is not a valid SetRank collection object"))
}
}
else {
cat(paste(specie,"is not a valid specie for SetRank. Available species : \n - Mus musculus \n - Homo sapiens"))
return(NULL)
}
base::dir.create(file.path(base::getwd(), "SetRankResults"), showWarnings = FALSE)
voomFit = limma::lmFit(voom.results, voom.results$design)
voomContrast = limma::contrasts.fit(voomFit, contrasts = contrast)
voomTreat = limma::treat(voomContrast, lfc = lfcCuttof)
setrank.results = list()
for (i in base::seq(1,base::length(base::colnames(contrast)))){
if (verbose == TRUE){
cat(paste(" Running SetRank for", base::colnames(contrast)[i], "contrast"))
}
network = SetRank::setRankAnalysis(base::rownames(limma::topTreat(voomTreat, coef = i, n = Inf)),setCollection = theCollection, use.ranks = TRUE, setPCutoff = pCutoff, fdrCutoff = fdrCutoff)
SetRank::exportSingleResult(network = network, selectedGenes = base::rownames(limma::topTreat(voomTreat, coef = i, n = Inf)), collection = theCollection, networkName = colnames(contrast)[i], outputPath = "./SetRankResults/")
fh = base::read.table(paste("./SetRankResults/",base::colnames(contrast)[i],"_pathways.txt", sep = ""), header = TRUE)
fh$Rank = base::seq(1,base::dim(fh)[1])
fh2 = base::cbind(fh$Rank,fh$size,fh$setRank,fh$pSetRank,fh$correctedPValue,fh$adjustedPValue)
base::rownames(fh2) = fh$description
base::colnames(fh2) = c("Rank","Size", "SetRank_Rank", "p.value", "Adj.p.value", "fdr")
fh = fh2
base::remove(fh2)
setrank.results[[base::colnames(contrast)[i]]] = fh
}
return(setrank.results)
}
method_checker <- function(method_vector){
if(length(method_vector) == 0){
print("Please select at least one GSEA method")
if(base::length(method_vector) == 0){
base::print("Please select at least one GSEA method")
}
methods <- list()
......@@ -22,55 +22,55 @@ method_checker <- function(method_vector){
prepareTwoGroupsData <- function(voom.results, contrast, genesetIdx,
min.samples = NULL, verbose = FALSE){
if (!is.matrix(contrast)){
if (!base::is.matrix(contrast)){
# find the reference samples
if (is.null(voom.results$targets) ||
is.null(voom.results$targets$group))
stop(paste0("The data frame 'targets' of the object 'voom.results' ",
if (base::is.null(voom.results$targets) ||
base::is.null(voom.results$targets$group))
base::stop(paste0("The data frame 'targets' of the object 'voom.results' ",
"must have a column named 'group'."))
ref.group = levels(factor(voom.results$targets$group))[1]
ref.group = base::levels(factor(voom.results$targets$group))[1]
if (verbose)
cat(paste0(" Reference group is identified as ", ref.group, "\n"))
cnt.sam.indx = which(voom.results$targets$group == ref.group)
cnt.sam.indx = base::which(voom.results$targets$group == ref.group)
}
# use gene indexes instead of gene IDs
groupData = list()
gsets = list()
for (j in 1:length(genesetIdx)){
gsets[[j]] = as.character(genesetIdx[[j]])
for (j in 1:base::length(genesetIdx)){
gsets[[j]] = base::as.character(genesetIdx[[j]])
}
names(gsets) = names(genesetIdx)
base::names(gsets) = base::names(genesetIdx)
groupData[["gsets"]] = gsets
# Extract a logCPM matrix for each contrast
data.log = voom.results$E
rownames(data.log) = as.character(seq(1, nrow(data.log)))
base::rownames(data.log) = base::as.character(base::seq(1, base::nrow(data.log)))
design = voom.results$design
sam.idx = 1:ncol(data.log)
sam.idx = 1:base::ncol(data.log)
groupData[["data"]] = list()
set.seed(05081986)
contr.num = ifelse(is.matrix(contrast), ncol(contrast), length(contrast))
base::set.seed(05081986)
contr.num = base::ifelse(base::is.matrix(contrast), base::ncol(contrast), base::length(contrast))
for(i in 1:contr.num){
if (is.matrix(contrast)){
if (base::is.matrix(contrast)){
# find the indexes of the treatment group samples
d = design[, contrast[,i] > 0]
if (is.null(ncol(d))){
if (base::is.null(base::ncol(d))){
tre.sam.indx = sam.idx[ d == 1]
}else if (ncol(d) > 1){
tre.sam.indx = c()
for (j in 1:ncol(d))
tre.sam.indx = c(tre.sam.indx, sam.idx[ d[,j]
}else if (base::ncol(d) > 1){
tre.sam.indx = base::c()
for (j in 1:base::ncol(d))
tre.sam.indx = base::c(tre.sam.indx, sam.idx[ d[,j]
== 1])
}
else
stop("Invalid contrasts selected.")
base::stop("Invalid contrasts selected.")
# find the indexes of the control group samples
d = design[, contrast[,i] < 0]
if (is.null(ncol(d))){
if (base::is.null(base::ncol(d))){
cnt.sam.indx = sam.idx[ d == 1]
}else if (ncol(d) > 1){
cnt.sam.indx = c()
for (j in 1:ncol(d))
cnt.sam.indx = c(cnt.sam.indx, sam.idx[ d[,j]
}else if (base::ncol(d) > 1){
cnt.sam.indx = base::c()
for (j in 1:base::ncol(d))
cnt.sam.indx = base::c(cnt.sam.indx, sam.idx[ d[,j]
== 1])
}
else
......@@ -79,98 +79,93 @@ prepareTwoGroupsData <- function(voom.results, contrast, genesetIdx,
tre.sam.indx = sam.idx[ design[, contrast[i]] == 1]
}
# Check if a minimum number of samples is required
if (! is.null(min.samples)){
if (length(tre.sam.indx) == 1)
tre.sam.indx = rep(tre.sam.indx, min.samples)
else if (length(tre.sam.indx) < min.samples)
tre.sam.indx = c(tre.sam.indx,
sample(tre.sam.indx, min.samples - length(tre.sam.indx)))
if (length(cnt.sam.indx) == 1)
cnt.sam.indx = rep(cnt.sam.indx, min.samples)
else if (length(cnt.sam.indx) < min.samples)
cnt.sam.indx = c(cnt.sam.indx,
sample(cnt.sam.indx, min.samples - length(cnt.sam.indx)))
if (! base::is.null(min.samples)){
if (base::length(tre.sam.indx) == 1)
tre.sam.indx = base::rep(tre.sam.indx, min.samples)
else if (base::length(tre.sam.indx) < min.samples)
tre.sam.indx = base::c(tre.sam.indx,
base::sample(tre.sam.indx, min.samples - base::length(tre.sam.indx)))
if (base::length(cnt.sam.indx) == 1)
cnt.sam.indx = base::rep(cnt.sam.indx, min.samples)
else if (base::length(cnt.sam.indx) < min.samples)
cnt.sam.indx = base::c(cnt.sam.indx,
base::sample(cnt.sam.indx, min.samples - base::length(cnt.sam.indx)))
}
# logCPM matrix has control samples then treatment samples
data.log.sel = data.log[, c(cnt.sam.indx, tre.sam.indx)]
data.log.sel = data.log[, base::c(cnt.sam.indx, tre.sam.indx)]
groupData$data[[i]] = list()
groupData$data[[i]][["logCPM"]] = data.log.sel
# group1 is control / reference
groupData$data[[i]][["group1"]] = seq(1,length(cnt.sam.indx))
groupData$data[[i]][["group2"]] = seq(length(cnt.sam.indx) +
1,ncol(data.log.sel))
groupData$data[[i]][["group1"]] = base::seq(1,base::length(cnt.sam.indx))
groupData$data[[i]][["group2"]] = base::seq(base::length(cnt.sam.indx) +
1,base::ncol(data.log.sel))
}
names(groupData$data) = colnames(contrast)
base::names(groupData$data) = base::colnames(contrast)
return(groupData)
}
getNumberofSamples <- function(voom.results, contrast){
if (is.null(voom.results$design)){
if (base::is.null(voom.results$design)){
return(0)
}
if (is.matrix(contrast)){
samples = c()
sam.idx = colnames(voom.results$E)
for(i in 1:ncol(contrast)){
if (base::is.matrix(contrast)){
samples = base::c()
sam.idx = base::colnames(voom.results$E)
for(i in 1:base::ncol(contrast)){
d = voom.results$design[, contrast[,i] != 0]
if (is.null(ncol(d))){
if (base::is.null(base::ncol(d))){
samples = sam.idx[ d == 1]
}else if (ncol(d) > 1){
for (j in 1:ncol(d))
samples = c(samples, sam.idx[ d[,j] == 1])
}else if (base::ncol(d) > 1){
for (j in 1:base::ncol(d))
samples = base::c(samples, sam.idx[ d[,j] == 1])
}
else
stop("Invalid contrasts selected.")
base::stop("Invalid contrasts selected.")
}
return(length(unique(samples)))
return(base::length(base::unique(samples)))
}else{
return(nrow(voom.results$design))
return(base::nrow(voom.results$design))
}
}
genesetToIdx <- function(geneset = "H", specie = "Mus musculus", entrezGenesIds){
if(!(require(limma))){
source("http://www.bioconductor.org/biocLite.R")
biocLite("limma")
require(limma)
}
if (specie == "Mus musculus"){
if (geneset == "H"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_H_v5p2.rdata"))
genesetToIdx = ids2indices(Mm.H, entrezGenesIds)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_H_v5p2.rdata"))
genesetToIdx = limma::ids2indices(Mm.H, entrezGenesIds)
} else if (geneset == "C2_KEGG"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_c2_v5p2.rdata"))
Mm.c2.subset = Mm.c2[grep("KEGG",attributes(Mm.c2)$names)]
genesetToIdx = ids2indices(Mm.c2.subset, entrezGenesIds)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_c2_v5p2.rdata"))
Mm.c2.subset = Mm.c2[base::grep("KEGG",base::attributes(Mm.c2)$names)]
genesetToIdx = limma::ids2indices(Mm.c2.subset, entrezGenesIds)
} else if(geneset == "C2_REACTOME"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_c2_v5p2.rdata"))
Mm.c2.subset = Mm.c2[grep("REACTOME",attributes(Mm.c2)$names)]
genesetToIdx = ids2indices(Mm.c2.subset, entrezGenesIds)
} else if (class(geneset) == "list"){
genesetToIdx = ids2indices(geneset, entrezGenesIds, remove.empty=TRUE)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/mouse_c2_v5p2.rdata"))
Mm.c2.subset = Mm.c2[base::grep("REACTOME",base::attributes(Mm.c2)$names)]
genesetToIdx = limma::ids2indices(Mm.c2.subset, entrezGenesIds)
} else if (base::class(geneset) == "list"){
genesetToIdx = limma::ids2indices(geneset, entrezGenesIds, remove.empty=TRUE)
} else {
cat(paste(geneset , "is not a valid MSigDB geneset. Available genesets : \n - H \n - C2"))
return(NULL)
}
} else if (specie == "Homo sapiens"){
if (geneset == "H"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/human_H_v5p2.rdata"))
genesetToIdx = ids2indices(Hs.H, entrezGenesIds)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/human_H_v5p2.rdata"))
genesetToIdx = limma::ids2indices(Hs.H, entrezGenesIds)
} else if (geneset == "C2_KEGG"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata"))
Hs.c2.subset = Hs.c2[grep("KEGG",attributes(Hs.c2)$names)]
genesetToIdx = ids2indices(Hs.c2.subset, entrezGenesIds)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata"))
Hs.c2.subset = Hs.c2[base::grep("KEGG",base::attributes(Hs.c2)$names)]
genesetToIdx = limma::ids2indices(Hs.c2.subset, entrezGenesIds)
} else if (geneset == "C2_REACTOME"){
load(url("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata"))
Hs.c2.subset = Hs.c2[grep("REACTOME",attributes(Hs.c2)$names)]
genesetToIdx = ids2indices(Hs.c2.subset, entrezGenesIds)
} else if (class(geneset) == "list"){
genesetToIdx = ids2indices(geneset, entrezGenesIds, remove.empty=TRUE)
base::load(base::url("http://bioinf.wehi.edu.au/software/MSigDB/human_c2_v5p2.rdata"))
Hs.c2.subset = Hs.c2[base::grep("REACTOME",base::attributes(Hs.c2)$names)]
genesetToIdx = limma::ids2indices(Hs.c2.subset, entrezGenesIds)
} else if (base::class(geneset) == "list"){
genesetToIdx = limma::ids2indices(geneset, entrezGenesIds, remove.empty=TRUE)
} else {
cat(paste(geneset , "is not a valid MSigDB geneset. Available genesets : \n - H \n - C2"))
return(NULL)
......@@ -189,9 +184,9 @@ msig_to_setrankDb = function(msigdb, orga, dbname){
# example : mouse_H = msig_to_setrankDb(Mm.H, "MOUSE", "H")
i = 1
df = data.frame(geneID = factor(), termID = factor(), termName = factor(), dbName = factor(), description = factor())
for (name in(names(msigdb))){
for (name in(base::names(msigdb))){
for (gene in msigdb[[name]]){
df = rbind(df, data.frame(geneID = factor(gene), termID = factor(paste(orga,"MSigDB",dbname,i,sep = "_")), termName = factor(name), dbName = factor("MSigDB"), description = factor("")))
df = base::rbind(df, data.frame(geneID = factor(gene), termID = factor(paste(orga,"MSigDB",dbname,i,sep = "_")), termName = factor(name), dbName = factor("MSigDB"), description = factor("")))
}
i = i+1
}
......@@ -199,13 +194,13 @@ msig_to_setrankDb = function(msigdb, orga, dbname){
}
dgeToElist = function(dgeobject){
tmp = new("EList")
for (i in names(dge)){
tmp = methods::new("EList")
for (i in base::names(dge)){
tmp[[i]] = dge[[i]]
}
if (is.null(tmp$E)){
if (base::is.null(tmp$E)){
tmp$E = tmp$counts
} else if (is.null(tmp$counts)){
} else if (base::is.null(tmp$counts)){
tmp$counts = tmp$E
} else {
cat("There is a problem with the count matrix")
......@@ -214,9 +209,9 @@ dgeToElist = function(dgeobject){
if(tmp$genes$ENTREZ){
tmp$genes = tmp$genes$ENTREZ
}
rownames(tmp$E) = tmp$genes
rownames(tmp$counts) = rownames(tmp$E)
if (is.null(tmp$common.dispersion)){
base::rownames(tmp$E) = tmp$genes
base::rownames(tmp$counts) = base::rownames(tmp$E)
if (base::is.null(tmp$common.dispersion)){
cat("Dispersion estimates needed.\nPlease run estimateDisp() on your dge Object.\nRun '?estimateDisp()' for help")
return()
}
......@@ -233,88 +228,88 @@ cGSEAcore = function(ElistObject, contrast, geneset = "H", specie = "Mus musculu
if(camera == TRUE){
print("RUNNING CAMERA")
camerastart = Sys.time()
camerastart = base::Sys.time()
res$camera = runcamera(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
camerastop = Sys.time()
time$camera = difftime(camerastop, camerastart, units = c("secs"))
camerastop = base::Sys.time()
time$camera = base::difftime(camerastop, camerastart, units = c("secs"))
}
if (gage == TRUE){
print("RUNNING GAGE")
gagestart = Sys.time()
gagestart = base::Sys.time()
res$gage = rungage(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
gagestop = Sys.time()
time$gage = difftime(gagestop,gagestart, units = c("secs"))
gagestop = base::Sys.time()
time$gage = base::difftime(gagestop,gagestart, units = c("secs"))
}
if (globaltest == TRUE){
print("RUNNING GLOBALTEST")
globalteststart = Sys.time()
globalteststart = base::Sys.time()
res$globaltest = runglobaltest(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
globalteststop = Sys.time()
time$globaltest = difftime(globalteststop,globalteststart, units = c("secs"))
globalteststop = base::Sys.time()
time$globaltest = base::difftime(globalteststop,globalteststart, units = c("secs"))
}
if (gsva == TRUE){
print("RUNNING GSVA")
gsvastart = Sys.time()
gsvastart = base::Sys.time()
res$gsva = rungsva(method = "gsva", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
gsvastop = Sys.time()
time$gsva = difftime(gsvastop,gsvastart, units = c("secs"))
gsvastop = base::Sys.time()
time$gsva = base::difftime(gsvastop,gsvastart, units = c("secs"))
}
if (ssgsea == TRUE){
print("RUNNING SSGSEA")
ssgseastart = Sys.time()
ssgseastart = base::Sys.time()
res$ssgsea = rungsva(method = "ssgsea", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
ssgseastop = Sys.time()
time$ssgsea = difftime(ssgseastop,ssgseastart, units = c("secs"))
ssgseastop = base::Sys.time()
time$ssgsea = base::difftime(ssgseastop,ssgseastart, units = c("secs"))
}
if (zscore == TRUE){
print("RUNNING ZSCORE")
zscorestart = Sys.time()
zscorestart = base::Sys.time()
res$zscore = rungsva(method = "zscore", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
zscorestop = Sys.time()
time$zscore = difftime(zscorestop,zscorestart, units = c("secs"))
zscorestop = base::Sys.time()
time$zscore = base::difftime(zscorestop,zscorestart, units = c("secs"))
}
if (plage == TRUE){
print("RUNNING PLAGE")
plagestart = Sys.time()
plagestart = base::Sys.time()
res$plage = rungsva(method = "plage", voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
plagestop = Sys.time()
time$plage = difftime(plagestop,plagestart, units = c("secs"))
plagestop = base::Sys.time()
time$plage = base::difftime(plagestop,plagestart, units = c("secs"))
}
if (ora == TRUE){
print("RUNNING ORA")
orastart = Sys.time()
orastart = base::Sys.time()
res$ora = runora(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
orastop = Sys.time()
time$ora = difftime(orastop, orastart, units = c("secs"))
orastop = base::Sys.time()
time$ora = base::difftime(orastop, orastart, units = c("secs"))
}
if (padog == TRUE){
print("RUNNING PADOG")
padogstart = Sys.time()
padogstart = base::Sys.time()
res$padog = runpadog(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
padogstop = Sys.time()
time$padog = difftime(padogstop, padogstart, units = c("secs"))
padogstop = base::Sys.time()
time$padog = base::difftime(padogstop, padogstart, units = c("secs"))
}
if (roast == TRUE){
print("RUNNING ROAST")
roaststart = Sys.time()
roaststart = base::Sys.time()
res$roast = runroast(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
roaststop = Sys.time()
time$roast = difftime(roaststop,roaststart, units = c("secs"))
roaststop = base::Sys.time()
time$roast = base::difftime(roaststop,roaststart, units = c("secs"))
}
if (safe == TRUE){
print("RUNNING SAFE")
safestart = Sys.time()
safestart = base::Sys.time()
res$safe = runsafe(voom.results = ElistObject, contrast = contrast, genesetIdx = genesetIdx, num.workers = num.workers, verbose = verbose)
safestop = Sys.time()
time$safe = difftime(safestop, safestart, units = c("secs"))
safestop = base::Sys.time()
time$safe = base::difftime(safestop, safestart, units = c("secs"))
}
if (setrank == TRUE){
print("RUNNING SETRANK")
setrankstart = Sys.time()
setrankstart = base::Sys.time()
res$setrank = runsetrank(voom.results = ElistObject, contrast = contrast, geneset = geneset, specie = specie, num.workers = num.workers, verbose = verbose)
setrankstop = Sys.time()
time$setrank = difftime(setrankstop, setrankstart, units = c("secs"))
setrankstop = base::Sys.time()
time$setrank = base::difftime(setrankstop, setrankstart, units = c("secs"))
}
output$time = time
output$res = res
......@@ -326,13 +321,13 @@ cGSEAcore = function(ElistObject, contrast, geneset = "H", specie = "Mus musculu
rank_getter = function(geneSetname, method_result){
out = list()
ranks = c()
pvals = c()
ranks = base::c()
pvals = base::c()
for (aset in geneSetname){
for (i in seq(1,nrow(method_result))){
if (rownames(method_result)[i] == aset){
ranks = append(ranks, as.data.frame(method_result)[["Rank"]][i])
pvals = append(pvals, as.data.frame(method_result)[["p.value"]][i])
for (i in base::seq(1,base::nrow(method_result))){
if (base::rownames(method_result)[i] == aset){
ranks = base::append(ranks, base::as.data.frame(method_result)[["Rank"]][i])
pvals = base::append(pvals, base::as.data.frame(method_result)[["p.value"]][i])
}
}
}
......@@ -344,64 +339,64 @@ rank_getter = function(geneSetname, method_result){
cGSEAOutputTable = function(cGSEAcoreOutput, contrastLevel = contrastLevel, geneSetLevels){
dt = list()
for (method in names(cGSEAcoreOutput$res)){
for (method in base::names(cGSEAcoreOutput$res)){
method_stats = rank_getter(geneSetname = geneSetLevels,method_result = cGSEAcoreOutput[["res"]][[method]][[contrastLevel]])
dt[[paste(method,"Rank",sep="_")]] = as.numeric(method_stats[["ranks"]])
dt[[paste(method,"Rank",sep="_")]] = base::as.numeric(method_stats[["ranks"]])
# ranks[[method]] = rank_getter(geneSetname = geneSetLevels,method_result = cGSEAOutput[["res"]][[method]][[contrastLevel]])
#p.values
dt[[paste(method,"p.value",sep="_")]] = as.numeric(method_stats[["pvals"]])
dt[[paste(method,"p.value",sep="_")]] = base::as.numeric(method_stats[["pvals"]])
}
# df = data.frame(matrix(unlist(dt), nrow = length(geneSetLevels), byrow = FALSE),stringsAsFactors=FALSE)
df = data.frame(matrix(unlist(dt), nrow = dim(cGSEAcoreOutput$res[[1]][[1]])[1], byrow = FALSE),stringsAsFactors=FALSE)
df = data.frame(matrix(base::unlist(dt), nrow = base::dim(cGSEAcoreOutput$res[[1]][[1]])[1], byrow = FALSE),stringsAsFactors=FALSE)
colnames(df) = names(dt)
rownames(df) = geneSetLevels
base::colnames(df) = base::names(dt)
base::rownames(df) = geneSetLevels
return(df)
}
getlogFCFromLMFit <- function(voom.results, contrast,
logFC.cutoff, fdr.cutoff){
# to be changed for gene symbols support
stopifnot(class(voom.results) == "EList")
base::stopifnot(base::class(voom.results) == "EList")
print("limma DE analysis is carried out ... ")
# fit linear model for each gene using limma package functions
vfit = lmFit(voom.results, design=voom.results$design) # Fit linear model
vfit = limma::lmFit(voom.results, design=voom.results$design) # Fit linear model
# for each gene given a series of arrays
if (is.matrix(contrast)){
vfit = contrasts.fit(vfit, contrast) # make all pair-wise comparisons
contr.names = colnames(contrast)
contr.num = ncol(contrast)
coefs = 1:ncol(contrast)
if (base::is.matrix(contrast)){
vfit = limma::contrasts.fit(vfit, contrast) # make all pair-wise comparisons
contr.names = base::colnames(contrast)
contr.num = base::ncol(contrast)
coefs = 1:base::ncol(contrast)
# between the groups
}else{
contr.names = names(contrast)
contr.num = length(contrast)
contr.names = base::names(contrast)
contr.num = base::length(contrast)
coefs = contrast
}
ebayes.results = eBayes(vfit) # compute moderated t-statistics, moderated
ebayes.results = limma::eBayes(vfit) # compute moderated t-statistics, moderated
#F-statistic, and log-odds of differential expression by empirical
# Bayes moderation of the standard errors towards a common value
logFC = matrix(0, nrow(ebayes.results), contr.num)
logFC = matrix(0, base::nrow(ebayes.results), contr.num)
limma.tops = list()
for (i in 1:length(coefs)){
top.table = topTable(ebayes.results, coef=coefs[i],
for (i in 1:base::length(coefs)){
top.table = limma::topTable(ebayes.results, coef=coefs[i],
number=Inf, sort.by="none")
limma.fc = top.table$logFC
names(limma.fc) = rownames(ebayes.results)
base::names(limma.fc) = base::rownames(ebayes.results)
logFC[, i] = limma.fc
rownames(top.table) = rownames(ebayes.results)
base::rownames(top.table) = base::rownames(ebayes.results)
limma.tops[[contr.names[i]]] = top.table
de.genes = top.table[top.table[, "adj.P.Val"] <= fdr.cutoff, ]
if (nrow(de.genes) == 0)
if (base::nrow(de.genes) == 0)
cat(paste0("WARNING: it seems the contrast ",
contr.names[i],
" has no DE genes at the selected 'fdr.cutoff'.\n",
"The 'fdr.cutoff' was ignored in the calculations.\n"))
if (nrow(de.genes) > 0){
de.genes = de.genes[abs(de.genes[, "logFC"]) >= logFC.cutoff, ]
if (nrow(de.genes) == 0)
if (base::nrow(de.genes) > 0){
de.genes = de.genes[base::abs(de.genes[, "logFC"]) >= logFC.cutoff, ]
if (base::nrow(de.genes) == 0)
cat(paste0("WARNING: it seems the contrast ",
contr.names[i],
" has no DE genes at the selected 'logFC.cutoff'.\n",
......@@ -409,8 +404,8 @@ getlogFCFromLMFit <- function(voom.results, contrast,
}
}
rownames(logFC) = rownames(ebayes.results)
colnames(logFC) = contr.names
base::rownames(logFC) = base::rownames(ebayes.results)
base::colnames(logFC) = contr.names
return(list(logFC=logFC, limma.results=ebayes.results, limma.tops=limma.tops))
}
......@@ -418,87 +413,78 @@ getlogFCFromLMFit <- function(voom.results, contrast,
signifCal = function(combiPval, avgLFC){
# combiPval : combined Pvalue after correction (correction : BH, combination : fisher)
#avgLFC : average logFC in geneset from edgerR/limma
signif = -log10(combiPval)*abs(avgLFC)
signif = -base::log10(combiPval)*base::abs(avgLFC)
return(signif)
}
geneSetsLogFC = function(logFcMatrix, genesetCollection, condition){
geneSetCollectionLogFC = c()
for (geneset in names(genesetCollection)){
setLogFC = c()
geneSetCollectionLogFC = base::c()
for (geneset in base::names(genesetCollection)){
setLogFC = base::c()
for (gene in genesetCollection[[geneset]]){
setLogFC = append(setLogFC, x = logFcMatrix$limma.tops[[condition]]$logFC[which(rownames(logFcMatrix$limma.tops[[condition]]) == gene)])
setLogFC = base::append(setLogFC, x = logFcMatrix$limma.tops[[condition]]$logFC[base::which(base::rownames(logFcMatrix$limma.tops[[condition]]) == gene)])
}
avgLogFC = mean(setLogFC)
geneSetCollectionLogFC = append(geneSetCollectionLogFC, avgLogFC)
avgLogFC = base::mean(setLogFC)
geneSetCollectionLogFC = base::append(geneSetCollectionLogFC, avgLogFC)
}
names(geneSetCollectionLogFC) = names(genesetCollection)
base::names(geneSetCollectionLogFC) = base::names(genesetCollection)
return(geneSetCollectionLogFC)
}
adjustPval = function(pvalTable, type){
switch(type,
holm = apply(pvalTable, 2, p.adjust, method = "holm"),
hochberg = apply(pvalTable, 2, p.adjust, method = "hochberg"),
hommel = apply(pvalTable, 2, p.adjust, method = "hommel"),
bonferroni = apply(pvalTable, 2, p.adjust, method = "bonferroni"),
BH = apply(pvalTable, 2, p.adjust, method = "BH"),
BY = apply(pvalTable, 2, p.adjust, method = "BY"),
fdr = apply(pvalTable, 2, p.adjust, method = "fdr"),
none = apply(pvalTable, 2, p.adjust, method = "none"))
holm = base::apply(pvalTable, 2, p.adjust, method = "holm"),
hochberg = base::apply(pvalTable, 2, p.adjust, method = "hochberg"),
hommel = base::apply(pvalTable, 2, p.adjust, method = "hommel"),
bonferroni = base::apply(pvalTable, 2, p.adjust, method = "bonferroni"),
BH = base::apply(pvalTable, 2, p.adjust, method = "BH"),
BY = base::apply(pvalTable, 2, p.adjust, method = "BY"),
fdr = base::apply(pvalTable, 2, p.adjust, method = "fdr"),
none = base::apply(pvalTable, 2, p.adjust, method = "none"))
}
combinePval = function(pvalAdjTable, type){
if(!require(metap)){
install.packages("metap")
require(metap)
}
switch(type,
sumz = apply(pvalAdjTable, 1, sumz),
votep = apply(pvalAdjTable, 1, votep),
minimump = apply(pvalAdjTable, 1, minimump),
sumlog = apply(pvalAdjTable, 1, sumlog),
sump = apply(pvalAdjTable, 1, sump),
logitp = apply(pvalAdjTable, 1, logitp),
meanp = apply(pvalAdjTable, 1, meanp),
maximump = apply(pvalAdjTable, 1, maximump))
sumz = base::apply(pvalAdjTable, 1, sumz),
votep = base::apply(pvalAdjTable, 1, votep),
minimump = base::apply(pvalAdjTable, 1, minimump),
sumlog = base::apply(pvalAdjTable, 1, sumlog),
sump = base::apply(pvalAdjTable, 1, sump),
logitp = base::apply(pvalAdjTable, 1, logitp),
meanp = base::apply(pvalAdjTable, 1, meanp),
maximump = base::apply(pvalAdjTable, 1, maximump))
}
comparisonSummaryData = function(preparedDataResumPlot2){
m = do.call(cbind, preparedDataResumPlot2)
colnames(m) = rep(c("x.data","y.data","dir","rank","sig","id","gsSize"),length(names(preparedDataResumPlot2)))
m = na.omit(m)
m.id = base::abbreviate(rownames(m), minlength = 6, use.classes = FALSE, dot = FALSE, named = FALSE)
m = m[,-grep("id", colnames(m))]
colnames(m) = rep(c("x.data","y.data","dir","rank","sig","gsSize"),length(names(preparedDataResumPlot2)))
nms = colnames(m)
m2 = sapply(unique(nms), function(x) rowMeans (m[, nms == x]))
m2 = as.data.frame(m2)
m2 = cbind(m2,m.id)
m2[,c("x.data","y.data","dir","rank","sig","gsSize")] = apply(m2[,c("x.data","y.data","dir","rank","sig","gsSize")],2,as.numeric)
colnames(m2)[ncol(m2)] = "id"
m = base::do.call(cbind, preparedDataResumPlot2)
base::colnames(m) = base::rep(base::c("x.data","y.data","dir","rank","sig","id","gsSize"),base::length(base::names(preparedDataResumPlot2)))
m = stats::na.omit(m)
m.id = base::abbreviate(base::rownames(m), minlength = 6, use.classes = FALSE, dot = FALSE, named = FALSE)
m = m[,-base::grep("id", colnames(m))]
base::colnames(m) = base::rep(base::c("x.data","y.data","dir","rank","sig","gsSize"),base::length(base::names(preparedDataResumPlot2)))
nms = base::colnames(m)
m2 = base::sapply(base::unique(nms), function(x) rowMeans (m[, nms == x]))
m2 = base::as.data.frame(m2)
m2 = base::cbind(m2,m.id)
m2[,base::c("x.data","y.data","dir","rank","sig","gsSize")] = base::apply(m2[,base::c("x.data","y.data","dir","rank","sig","gsSize")],2,as.numeric)
base::colnames(m2)[base::ncol(m2)] = "id"
return(m2)
}
combineRanks = function(resTable){
rankCol = grep("_Rank", colnames(resTable))
meanRank = apply(resTable[,rankCol],1,mean)
return(cbind(resTable, as.numeric(meanRank)))
rankCol = base::grep("_Rank", base::colnames(resTable))
meanRank = base::apply(resTable[,rankCol],1,mean)
return(base::cbind(resTable, base::as.numeric(meanRank)))
}
prepareData = function(cGSEAcoreOutput, alpha = 0.05, directoryPath, pvalAdjMethod = "BH", pvalCombMethod = "sumlog", min.intersection.size = 1, shinyMode = FALSE){
if(!require(SuperExactTest)){
install.packages("SuperExactTest")
require(SuperExactTest)
}
logFCTable = getlogFCFromLMFit(voom.results = cGSEAcoreOutput$Elist, contrast = cGSEAcoreOutput$contrast, logFC.cutoff = 0, fdr.cutoff = 1)
time = unlist(cGSEAcoreOutput$time)
names(time) = names(cGSEAcoreOutput$time)
time = base::unlist(cGSEAcoreOutput$time)
base::names(time) = base::names(cGSEAcoreOutput$time)
output = list()
......@@ -515,84 +501,84 @@ prepareData = function(cGSEAcoreOutput, alpha = 0.05, directoryPath, pvalAdjMeth
for (condi in colnames(cGSEAcoreOutput$contrast)){
for (condi in base::colnames(cGSEAcoreOutput$contrast)){
resTable = cGSEAOutputTable(cGSEAcoreOutput = cGSEAcoreOutput, contrastLevel = condi, geneSetLevels = names(cGSEAcoreOutput$collection))
resTable = na.omit(resTable)
resTable = stats::na.omit(resTable)
# pvalue adjustment and combination
pvalcol = grep("p.value", colnames(resTable))
pvalcol = base::grep("p.value", base::colnames(resTable))
pvalTable = resTable[,pvalcol]
#pvalAdjTable = apply(pvalTable, 2, p.adjust, method = "BH") #Benjamini Hochberg for pvalue adjustment
pvalAdjTable = adjustPval(pvalTable, type = pvalAdjMethod)
colnames(pvalAdjTable) = gsub("_p.value","_adj_p.value",colnames(pvalAdjTable))
base::colnames(pvalAdjTable) = base::gsub("_p.value","_adj_p.value",base::colnames(pvalAdjTable))
# pvalComb = apply(pvalAdjTable, 1, sumlog) #fisher method for pvalue combination
pvalComb = combinePval(pvalAdjTable, type = pvalCombMethod)
pvalComb = unlist(lapply(pvalComb, '[[',3))
pvalComb = base::unlist(base::lapply(pvalComb, '[[',3))
#result Table
resTable2 = cbind(resTable, pvalAdjTable)
resTable3 = cbind(resTable2, pvalComb)
colnames(resTable3)[ncol(resTable3)] = "combined_p.value"
result[[condi]] = na.omit(resTable3)
resTable2 = base::cbind(resTable, pvalAdjTable)
resTable3 = base::cbind(resTable2, pvalComb)
base::colnames(resTable3)[base::ncol(resTable3)] = "combined_p.value"
result[[condi]] = stats::na.omit(resTable3)
result[[condi]] = combineRanks(result[[condi]])
colnames(result[[condi]])[ncol(result[[condi]])] = "Avg_Rank"
base::colnames(result[[condi]])[base::ncol(result[[condi]])] = "Avg_Rank"
if (shinyMode == FALSE){
write.csv(result[[condi]], file = paste(directoryPath,"/result_",condi,".csv", sep = ""))
utils::write.csv(result[[condi]], file = paste(directoryPath,"/result_",condi,".csv", sep = ""))
}
#ranks
rankCol = grep("Rank",colnames(resTable))
rankCol = base::grep("Rank",base::colnames(resTable))
rankTable = resTable[,rankCol]
colnames(rankTable) = gsub("_Rank","", colnames(rankTable))
base::colnames(rankTable) = base::gsub("_Rank","", base::colnames(rankTable))
#data for pVal binary heatmap
heatmap[[condi]] = t(apply(pvalAdjTable,1, function(x) ifelse(x < alpha, 1, 0)))
colnames(heatmap[[condi]]) = gsub("_adj_p.value","",colnames(heatmap[[condi]]))
this.apply = apply(heatmap[[condi]], 1, sum)
max_methods = max(this.apply)
heatmap[[condi]] = heatmap[[condi]][c(which(this.apply == max_methods),which(this.apply == (max_methods-1))),]
heatmap[[condi]] = base::t(base::apply(pvalAdjTable,1, function(x) base::ifelse(x < alpha, 1, 0)))
base::colnames(heatmap[[condi]]) = base::gsub("_adj_p.value","",base::colnames(heatmap[[condi]]))
this.apply = base::apply(heatmap[[condi]], 1, sum)
max_methods = base::max(this.apply)
heatmap[[condi]] = heatmap[[condi]][base::c(base::which(this.apply == max_methods),base::which(this.apply == (max_methods-1))),]
#data for PCA
PCA[[condi]] = t(rankTable)
PCA[[condi]] = base::t(rankTable)
#data for clustering
clustering[[condi]] = hclust(dist(t(rankTable), method = "euclidean"), method = "ward.D2")
clustering[[condi]] = stats::hclust(stats::dist(base::t(rankTable), method = "euclidean"), method = "ward.D2")
#data for correlation plot
correlation[[condi]] = cor(rankTable)
correlation[[condi]] = stats::cor(rankTable)
#for SnailPlot
gsFactor = as.factor(names(cGSEAcoreOutput$collection))
listInput = apply(pvalAdjTable, 2, function(x) names(which(x < alpha)))
cond = sapply(listInput, function(x) length(x) > 0)
gsFactor = base::as.factor(base::names(cGSEAcoreOutput$collection))
listInput = base::apply(pvalAdjTable, 2, function(x) base::names(base::which(x < alpha)))
cond = base::sapply(listInput, function(x) base::length(x) > 0)
listInput = listInput[cond]
listInput = sapply(listInput, function(x) factor(x, levels = levels(gsFactor)))
names(listInput) = gsub("_adj_p.value","", names(listInput))
listInput = base::sapply(listInput, function(x) factor(x, levels = base::levels(gsFactor)))
base::names(listInput) = base::gsub("_adj_p.value","", base::names(listInput))
snailPlot[[condi]] = listInput
abrev_names_vector = base::abbreviate(gsub("HALLMARK_","H_",names(cGSEAcoreOutput$collection)), minlength = 6, use.classes = FALSE, dot = FALSE, named = FALSE, method = "left.kept")
abrev_names_mat = cbind(cGSEAcoreOutput$collection,abrev_names_vector)
colnames(abrev_names_mat) = c("original","abbreviation")
abrev[[condi]] = t(as.data.frame(abrev_names_mat[,2]))
abrev[[condi]] = cbind(rownames(abrev[[condi]]), abrev[[condi]])
colnames(abrev[[condi]]) = c("Gene Set Name","abbreviation")
abrev_names_vector = base::abbreviate(base::gsub("HALLMARK_","H_",base::names(cGSEAcoreOutput$collection)), minlength = 6, use.classes = FALSE, dot = FALSE, named = FALSE, method = "left.kept")
abrev_names_mat = base::cbind(cGSEAcoreOutput$collection,abrev_names_vector)
base::colnames(abrev_names_mat) = base::c("original","abbreviation")
abrev[[condi]] = base::t(base::as.data.frame(abrev_names_mat[,2]))
abrev[[condi]] = base::cbind(base::rownames(abrev[[condi]]), abrev[[condi]])
base::colnames(abrev[[condi]]) = base::c("Gene Set Name","abbreviation")
if (shinyMode == FALSE){
write.csv(abrev[[condi]], file = paste(directoryPath,"/abrreviations_",condi,".csv", sep = ""))
utils::write.csv(abrev[[condi]], file = paste(directoryPath,"/abrreviations_",condi,".csv", sep = ""))
}
#for resumPlot1 - simple summary plot
gsLogFC = geneSetsLogFC(logFcMatrix = logFCTable, genesetCollection = cGSEAcoreOutput$collection, condition = condi)
resumPlot1[[condi]]$x = -log10(pvalComb)
resumPlot1[[condi]]$x = -base::log10(pvalComb)
resumPlot1[[condi]]$y = gsLogFC
resumPlot1[[condi]]$labels = abrev_names_vector
#for resumPlot2 - advanced summary plot
signifScore = signifCal(combiPval = pvalComb, avgLFC = gsLogFC)
avgRank = apply(resTable[,rankCol],1,mean)
avgRank = base::apply(resTable[,rankCol],1,mean)
resumPlot2[[condi]] = cbind(as.numeric(-log10(pvalComb)), abs(as.numeric(gsLogFC)), as.numeric(gsLogFC/abs(gsLogFC)), as.numeric(avgRank), as.numeric(signifScore), abrev_names_vector, unlist(lapply(cGSEAcoreOutput$collection, length)))
colnames(resumPlot2[[condi]]) = c("x.data","y.data","dir","rank","sig","id", "gsSize")
rownames(resumPlot2[[condi]]) = names(cGSEAcoreOutput$collection)
resumPlot2[[condi]] = as.data.frame(resumPlot2[[condi]])
resumPlot2[[condi]][,c("x.data","y.data","dir","rank","sig", "gsSize")] = apply(resumPlot2[[condi]][,c("x.data","y.data","dir","rank","sig", "gsSize")],2, as.numeric)
resumPlot2[[condi]] = base::cbind(base::as.numeric(-base::log10(pvalComb)), base::abs(base::as.numeric(gsLogFC)), base::as.numeric(gsLogFC/base::abs(gsLogFC)), base::as.numeric(avgRank), base::as.numeric(signifScore), abrev_names_vector, base::unlist(base::lapply(cGSEAcoreOutput$collection, length)))
base::colnames(resumPlot2[[condi]]) = base::c("x.data","y.data","dir","rank","sig","id", "gsSize")
base::rownames(resumPlot2[[condi]]) = base::names(cGSEAcoreOutput$collection)
resumPlot2[[condi]] = base::as.data.frame(resumPlot2[[condi]])
resumPlot2[[condi]][,base::c("x.data","y.data","dir","rank","sig", "gsSize")] = base::apply(resumPlot2[[condi]][,base::c("x.data","y.data","dir","rank","sig", "gsSize")],2, as.numeric)
# resumPlot2Table[[condi]] = cbind(as.numeric(-log10(pvalComb)), abs(as.numeric(gsLogFC)), as.numeric(gsLogFC/abs(gsLogFC)), as.numeric(avgRank), as.numeric(signifScore), abrev_names_vector, unlist(lapply(cGSEAcoreOutput$collection, length)))
# colnames(resumPlot2Table[[condi]]) = c("x.data","y.data","dir","rank","sig","id", "gsSize")
......@@ -600,7 +586,7 @@ prepareData = function(cGSEAcoreOutput, alpha = 0.05, directoryPath, pvalAdjMeth
# resumPlot2Table[[condi]] = as.data.frame(resumPlot2[[condi]])
# resumPlot2Table[[condi]][,c("x.data","y.data","dir","rank","sig", "gsSize")] = apply(resumPlot2[[condi]][,c("x.data","y.data","dir","rank","sig", "gsSize")],2, as.numeric)
}
if (length(colnames(cGSEAcoreOutput$contrast)) > 1){
if (base::length(base::colnames(cGSEAcoreOutput$contrast)) > 1){
output$comparison = comparisonSummaryData(resumPlot2)
}
......
# --- WARNING -----
#### Permanantly moved to [GitHub](https://github.com/maxibor/coGSEA)
# coGSEA
**co**mparative **G**ene **S**et **E**nrichment **Analysis**
......@@ -8,8 +13,7 @@ This version doesn't include the *SetRank* method for reason of package installa
## How to install
```
require(devtools)
devtools::install_git('https://mborry@gitlab.pasteur.fr/mborry/coGSEA.git')
devtools::install_github("maxibor/coGSEA")
```
## Input files
......
---
title: "Preparing micro-array data for coGSEA analysis"
output: html_notebook
---
## Reading and preparing data for [coGSEA](link) analysis.
In this document, we will show you how to prepare micro-array data to perform a coGSEA analysis.
This example dataset is from a [article](https://www.ncbi.nlm.nih.gov/pubmed/21836821) about Astrocymotas tumors published in 2011 by Liu et al.
You can download it on the Gene Expression Omnibus database with the accession number [GSE19728](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE19728)
#
#### Loading the necessary packages for this analysis
```{r, eval}
library(affy)
library(hgu133plus2.db)
library(edgeR)
library(gage)
library(coGSEA)
```
#### Loading the CEL FILES and Normalizing them usin `rma`
```{r}
setwd("~/GitLab/GSE19728/data/")
celfiles = ReadAffy()
celfiles = rma(celfiles)
```
#### Getting the expression data, and the Probe Names
```{r}
intensity = exprs(celfiles)
intensity = cbind(rownames(intensity), intensity)
colnames(intensity)[1] = "PROBEID"
intensity = as.data.frame(intensity)
intensity$PROBEID = as.character(intensity$PROBEID)
```
#### Getting the annotations of Probes IDS to ENTREZ accession numbers
```{r}
annots = select(hgu133plus2.db, intensity$PROBEID, "ENTREZID", "PROBEID")
```
#### Merge expression matrix and annotation
```{r}
res = merge(intensity, annots, by= "PROBEID")
```
#### Getting rid of PROBE ids and type casting
```{r}
resmin = res[,2:ncol(res)]
cname = colnames(resmin)
resmin = apply(resmin, 2, as.numeric)
colnames(resmin)= cname
resmin = as.data.frame(resmin)
```
#### Aggregating the PROBES matching the same ENTREZ accession number by averaging them and applying a log transformation
```{r}
result = aggregate(. ~ ENTREZID, resmin, mean)
result$ENTREZID = levels(as.factor(as.character(resmin$ENTREZID)))
rownames(result) = result$ENTREZID
result = result[,-1]
result = log(result)
```
#### Visualizing in boxplot to check normalization
```{r}
boxplot(result, las = 2)
```
#### Changing column names to remove the `.CEL` filename extension
```{r}
colnames(result) = gsub(".CEL","", colnames(result))
print(colnames(result))
```
#### Selecting only the samples we are interested in
```{r}
result2 = cbind(result$GSM492649_Astrocytomas_N, result$GSM525014, result$GSM525015, result$GSM525016, result$`GSM492662_Astrocytomas_T4-1`, result$`GSM492663_Astrocytomas_T4-2` , result$`GSM492664_Astrocytomas_T4-3`, result$`GSM492665_Astrocytomas_T4-4`, result$`GSM492666_Astrocytomas_T4-5`)
colnames(result2) = c("GSM492649_Astrocytomas_N", "GSM525014", "GSM525015", "GSM525016","GSM492662_Astrocytomas_T4-1", "GSM492663_Astrocytomas_T4-2", "GSM492664_Astrocytomas_T4-3", "GSM492665_Astrocytomas_T4-4", "GSM492666_Astrocytomas_T4-5" )
rownames(result2) = rownames(result)
```
#### Preparing the design matrix
```{r}
Normal = c(rep(1,4),rep(0,5))
Tumor = c(rep(0,4),rep(1,5))
design = cbind(Normal, Tumor)
rownames(design) = colnames(result2)
```
#### Preparing the contrast matrix
```{r}
contr.matrix = makeContrasts(NormalVSTumor = Normal - Tumor, levels = design)
```
#### Preparing expression list object
```{r}
temp = new("EList")
temp$design = design
temp$E = as.matrix(result2)
rownames(temp$E) = as.numeric(rownames(temp$E))
temp$genes$ENTREZ = rownames(result2)
temp$common.dispersion = estimateDisp(temp$E, design = temp$design)$common.dispersion
temp$samples = colnames(result2)
```
#### Preparing gene set collection
```{r}
gs = gage::kegg.gsets(species = "hsa", id.type = "entrez")
geneset = gs$kg.sets
```
#### Function for simplifying gene sets names
```{r}
nameshorter = function(names){
namemod = c()
for (i in seq(1,length(names))){
namemod[i] = paste(strsplit(names[i], " ")[[1]][-1], sep = "", collapse = " ")
namemod[i] = gsub("/","", names[i])
namemod[i] = gsub(" ","_", names[i])
}
return(namemod)
}
```
#### Simplifying gene sets names
```{r}
names(geneset) = nameshorter(names(geneset))
names(geneset) = gsub("/","_",names(geneset))
```
#### Saving necessary objects to RDS files
```{r}
saveRDS(contr.matrix, "~/GitLab/GSE19728/contrast.rds")
saveRDS(temp, "~/GitLab/GSE19728/elist.rds")
saveRDS(geneset, "~/GitLab/GSE19728/geneset.rds")
```
#### Reading necessary objects (generated above) from RDS files
```{r}
elist = readRDS("~/GitLab/GSE19728/elist.rds")
contrast = readRDS("~/GitLab/GSE19728/contrast.rds")
geneset = readRDS("~/GitLab/GSE19728/geneset.rds")
```
#### Running coGSEA analysis
```{r}
coGSEA(ElistObject = elist, contrastMatrix = contrast, ENTREZGenesIds = elist$genes$ENTREZ, geneSetCollection = geneset,specie = "Homo sapiens", directoryPath = "~/GitLab/GSE19728/results", alpha = 0.05, pvalAdjMethod = "BH", pvalCombMethod = "sumlog",min.intersection.size = 1, GSEA.Methods = c("camera", "gage","globaltest", "gsva", "ssgsea", "zscore", "ora", "padog", "roast","safe"), num.workers = 4, shinyMode = FALSE)
```
\ No newline at end of file
Source diff could not be displayed: it is too large. Options to address this: view the blob.
---
title: "Preparing micro-array data for coGSEA analysis"
output: html_notebook
---
$$\log{FC} = \frac{log{E_{S4}}}{log{E_{normal}}}$$
## Reading and preparing data for [coGSEA](link) analysis.
In this document, we will show you how to prepare RNA-seq data to perform a coGSEA analysis.
You can download this example dataset on the Gene Expression Omnibus database with the accession number [GSE63310](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE63310)
This dataset was analyzed in a very detailed [article](https://f1000research.com/articles/5-1408/v2) on how to do differential expression analysis that we strongly advise you to read.
The file you're looking for is : `GSE63310_RAW.tar`
#### Loading necessary packages
```{r}
library(edgeR)
library(limma)
library(Mus.musculus)
library(coGSEA)
```
#### Reading files
```{r}
setwd("~/GitLab/GSE63310/data/")
files <- c(
"GSM1545535_10_6_5_11.txt",
"GSM1545536_9_6_5_11.txt",
"GSM1545538_purep53.txt",
"GSM1545539_JMS8-2.txt",
"GSM1545540_JMS8-3.txt",
"GSM1545541_JMS8-4.txt",
"GSM1545542_JMS8-5.txt",
"GSM1545544_JMS9-P7c.txt",
"GSM1545545_JMS9-P8c.txt")
x <- readDGE(files, columns=c(1,3))
```
#### Simplifying file names
```{r}
colnames(x) = substring(colnames(x), 12, nchar(colnames(x)))
```
#### Grouping by sample condition
```{r}
group <- as.factor(c("LP", "ML", "Basal", "Basal", "ML", "LP", "Basal", "ML", "LP"))
x$samples$group <- group
```
#### Grouping by lane
```{r}
lane <- as.factor(rep(c("L004","L006","L008"), c(3,4,2)))
x$samples$lane <- lane
x$samples
```
#### Annotation
```{r}
geneid <- rownames(x)
genes <- select(Mus.musculus, keys=geneid, columns=c("SYMBOL", "TXCHROM"),keytype="ENTREZID")
dim(genes)
head(genes)
```
#### Getting rid of duplicated annoations by keeping only the first one
```{r}
genes <- genes[!duplicated(genes$ENTREZID),]
```
#### Count per million of reads
```{r}
cpm <- cpm(x)
```
#### Removing genes lowly expressed (genes with 0 expression across all samples)
```{r}
#Removing genes lowly expressed (genes with 0 expression across all samples)
table(rowSums(x$counts==0)==9)
keep.exprs <- rowSums(cpm>1)>=3
x <- x[keep.exprs,, keep.lib.sizes=FALSE]
dim(x)
```
#### Normlization with edgeR
```{r}
x <- calcNormFactors(x, method = "TMM")
x$samples$norm.factors
```
#### Making the design matrix
```{r}
design <- model.matrix( ~ 0 + group + lane)
colnames(design) <- gsub("group", "", colnames(design))
rownames(design) = colnames(x)
```
#### Making the contrast matrix
```{r}
contr.matrix <- makeContrasts(
BasalvsLP = Basal-LP,
BasalvsML = Basal - ML,
LPvsML = LP - ML,
levels = colnames(design))
contr.matrix
```
#### Applying voom transformation
```{r}
v <- voom(x, design, plot=F)
v$genes$ENTREZ = rownames(v$E)
```
#### Saving objects
```{r}
saveRDS(v, "~/GitLab/GSE63310/elist.rds")
saveRDS(contr.matrix, "~/GitLab/GSE63310/contrast.rds")
```
#### Reading RDS objects (previously genereated above)
```{r}
elist = readRDS("~/GitLab/GSE63310/elist.rds")
contrast = readRDS("~/GitLab/GSE63310/contrast.rds")
```
#### Running coGSEA
```{r}
coGSEA(ElistObject = elist, contrastMatrix = contrast, ENTREZGenesIds = elist$genes$ENTREZ, geneSetCollection = "C2_KEGG",specie = "Mus musculus", directoryPath = "~/GitLab/GSE63310/results", alpha = 0.05, pvalAdjMethod = "BH", pvalCombMethod = "sumlog",min.intersection.size = 1, GSEA.Methods = c("camera", "gage","globaltest", "gsva", "ssgsea", "zscore", "ora", "padog", "roast","safe"), num.workers = 4, shinyMode = FALSE)
```
Source diff could not be displayed: it is too large. Options to address this: view the blob.