diff --git a/Wednesday/Descriptive/Descriptive_analysis.Rmd b/Wednesday/Descriptive/Descriptive_analysis.Rmd index 145b35f7008309ef85e6b0af03dc4980aae8cbe8..62fdff0a3742887168f761e429b596a6f7a33c8d 100644 --- a/Wednesday/Descriptive/Descriptive_analysis.Rmd +++ b/Wednesday/Descriptive/Descriptive_analysis.Rmd @@ -23,7 +23,6 @@ knitr::opts_knit$set(progress = FALSE, verbose = FALSE) ```{r loadLibraries} library("R.utils") library("DESeq2") -library("vsn") library("pheatmap") library("RColorBrewer") library("ggplot2") @@ -32,7 +31,7 @@ library("dplyr") library("knitr") library(cluster) -WDIR='./' +WDIR='/Users/claudia/work/courses/PHINDaccess/FUN-OMICs/Wednesday/Descriptive/' setwd(WDIR) ``` @@ -41,11 +40,11 @@ setwd(WDIR) ```{r loadData} #Load data fileName="./data/tablecounts_raw.csv" -countData=read.table(fileName,row.names=1,header=T,sep=",") +countData=read.table(fileName,row.names=1,header=T) fileName="./data/target.txt" -colData=read.table(fileName,row.names=4,header=T,sep="\t") -colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC","SFA")) +colData=read.table(fileName,row.names=1,header=T) +colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC")) colData$Rep=gsub(".*-","",colData$Sample) n=1 colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(x, nchar(x)-n+1, nchar(x))})) @@ -53,124 +52,6 @@ colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr( cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8,10)] ``` - -# Quality check - -The two functions, normTransform (log2) and varianceStabilizingTransformation, have an argument blind, for whether the transformation should be blind to the sample information specified by the design formula. When blind equals TRUE (the default), the functions will re-estimate the dispersions using only an intercept (design formula ~1). This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups. - - -```{r QA} -design = ~ Group -dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = design) -dds <- DESeq(dds) -sizeFactors(dds) - -#Effects of transformations on the variance -notAllZero<-(rowSums(counts(dds))>0) -ntd <- normTransform(dds) -vsd <- varianceStabilizingTransformation(dds) - -meanSdPlot(assay(ntd[notAllZero,])) -meanSdPlot(assay(vsd[notAllZero,])) -``` - -## PCA - -```{r PCA_all, message = FALSE, warning = FALSE, echo = FALSE} - -PCA_3PCS <- function(DataMtx,colDataPCA,color,shape,cols) { - # Computing PCA - pca=prcomp(t(na.omit(DataMtx)),scale = FALSE) - exp=ceiling(pca$sdev/sum(pca$sdev)*100) - PCA <- as.data.frame(prcomp(t(na.omit(DataMtx)),scale = FALSE)$x) - - PCA_Plot_group21 <- ggplot(data = PCA,aes(x = PC1,y = PC2)) + - geom_point(size=5,aes(color=color,shape=shape)) + - scale_color_manual(values = cols, name="Condition") + - scale_shape_discrete(name="") + - ylab(paste0("PC2 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[1]),"%")) + - theme(text = element_text(size=15)) + theme_bw() - theme_bw() - PCA_Plot_group13 <- ggplot(data = PCA,aes(x = PC1,y = PC3)) + - geom_point(size=5,aes(color=color,shape=shape)) + - scale_color_manual(values = cols, name="Condition") + - scale_shape_discrete(name="") + - ylab(paste0("PC3 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[3]),"%")) + - theme_bw() + theme(text = element_text(size=15)) + theme_bw() - - print(PCA_Plot_group21) - print(PCA_Plot_group13) -} - -DataMtx=assay(vsd) -colDataPCA=colData -shape=colData$Cell -color=colData$Cond -PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -#color=colData$Rep -#PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -``` - -# Filter datasets - -Filter out outlying sample (which one?) and uninteresting samples. - -```{r filter} -# Filter out bad sample an uninteresting samples -colDataFilt <- colData[-grep("SFA",colData$Sample),] -colDataFilt <- colDataFilt[-grep("^CD-R3",colDataFilt$Sample),] -colDataFilt$Cond <- factor(colDataFilt$Cond,levels=c("C","SC","FA","HC","SFA")) - -countDataFilt <- countData[,row.names(colDataFilt)] -``` - -# Quality check - -## PCA -```{r} - -dds <- DESeqDataSetFromMatrix(countData = countDataFilt, colData = colDataFilt, - design = ~ Group) -dds <- DESeq(dds) -vsd=varianceStabilizingTransformation(dds) - -# DataMtx=assay(vsd) -# colDataPCA=colDataFilt -# shape=ifelse(colDataFilt$Cell=="B","BRIGHT","DIM") -# color=ifelse(colDataFilt$Cond=="FA","FAR",ifelse(colDataFilt$Cond=="C","EXP", -# ifelse(colDataFilt$Cond=="SC","STP","HYP"))) -# cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8)] -# PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) -# -DataMtx=assay(vsd) -colDataPCA=colDataFilt -shape=colDataFilt$Cell -color=colDataFilt$Cond -PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -``` - -## Clustering complete dataset - -Clustering done on variance stabilized data. - -``` {r heatmap_all} -#Heatmap of the sample-to-sample distances -sampleDists <- dist(t(assay(vsd))) -sampleDistMatrix <- as.matrix(sampleDists) -rownames(sampleDistMatrix) <- vsd$Sample -colnames(sampleDistMatrix) <- NULL -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - clustering_distance_rows=sampleDists, - clustering_distance_cols=sampleDists, - col=colors,fontsize_row=9) -``` - - - # Differential analysis ```{r} @@ -183,6 +64,10 @@ exportResults <- function(results_table, comparaison, alpha=0.05){ return(list(rownames(down.name), rownames(up.name))) } +dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, + design = ~ Group) +dds <- DESeq(dds) + ``` ## Downregulated genes @@ -296,7 +181,7 @@ AnnotationCols=data.frame(Condition=colnames(DataMtx), row.names = colnames(DataMtx)) AnnotationRows=data.frame(Profile=factor(clust,levels = seq(1,nClust,1)), row.names = names(clust)) -colsC=brewer.pal(n = nClust, name = "Blues") +colsC=brewer.pal(n = nClust, name = "Greys") colsAnnot=list(Condition=c("C"=cols[1],"SC"=cols[2],"FA"=cols[3],"HC"=cols[4]), Profile=c("1"=colsC[1],"2"=colsC[2],"3"=colsC[3],"4"=colsC[4]))