Replace Descriptive_analysis.Rmd

a5ecc3e5 · Claudia CHICA · 33507334 · a5ecc3e5
Commit a5ecc3e5 authored 3 years ago by Claudia CHICA
--- a/Wednesday/Descriptive/Descriptive_analysis.Rmd
+++ b/Wednesday/Descriptive/Descriptive_analysis.Rmd
@@ -23,7 +23,6 @@ knitr::opts_knit$set(progress = FALSE, verbose = FALSE)
 ```{r loadLibraries}
 library("R.utils")
 library("DESeq2")
-library("vsn")
 library("pheatmap")
 library("RColorBrewer")
 library("ggplot2")
@@ -32,7 +31,7 @@ library("dplyr")
 library("knitr")
 library(cluster)
-WDIR='./'
+WDIR='/Users/claudia/work/courses/PHINDaccess/FUN-OMICs/Wednesday/Descriptive/'
 setwd(WDIR)
 ```
@@ -41,11 +40,11 @@ setwd(WDIR)
 ```{r loadData}
 #Load data
 fileName="./data/tablecounts_raw.csv"
-countData=read.table(fileName,row.names=1,header=T,sep=",")
+countData=read.table(fileName,row.names=1,header=T)
 fileName="./data/target.txt"
-colData=read.table(fileName,row.names=4,header=T,sep="\t")
+colData=read.table(fileName,row.names=1,header=T)
-colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC","SFA"))
+colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC"))
 colData$Rep=gsub(".*-","",colData$Sample)
 n=1
 colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(x, nchar(x)-n+1, nchar(x))}))
@@ -53,124 +52,6 @@ colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(
 cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8,10)]
 ```
-# Quality check
-The two functions, normTransform (log2) and varianceStabilizingTransformation, have an argument blind, for whether the  transformation  should  be  blind  to  the  sample  information  specified  by  the  design  formula.  When blind equals TRUE (the default), the functions will re-estimate the dispersions using only an intercept (design formula ~1).  This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups.
-```{r QA}
-design = ~ Group
-dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = design)
-dds <- DESeq(dds)
-sizeFactors(dds)
-#Effects of transformations on the variance
-notAllZero<-(rowSums(counts(dds))>0)
-ntd <- normTransform(dds)
-vsd <- varianceStabilizingTransformation(dds)
-meanSdPlot(assay(ntd[notAllZero,]))
-meanSdPlot(assay(vsd[notAllZero,]))
-```
-## PCA
-```{r PCA_all, message = FALSE, warning = FALSE, echo = FALSE}
-PCA_3PCS <- function(DataMtx,colDataPCA,color,shape,cols) {
-  # Computing PCA
-  pca=prcomp(t(na.omit(DataMtx)),scale = FALSE)
-  exp=ceiling(pca$sdev/sum(pca$sdev)*100)
-  PCA  <- as.data.frame(prcomp(t(na.omit(DataMtx)),scale = FALSE)$x) 
-  PCA_Plot_group21 <- ggplot(data = PCA,aes(x = PC1,y = PC2)) +
-    geom_point(size=5,aes(color=color,shape=shape)) + 
-    scale_color_manual(values = cols, name="Condition") +
-    scale_shape_discrete(name="") +
-    ylab(paste0("PC2 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[1]),"%")) + 
-    theme(text = element_text(size=15)) + theme_bw()
-    theme_bw()
-  PCA_Plot_group13 <- ggplot(data = PCA,aes(x = PC1,y = PC3)) +
-    geom_point(size=5,aes(color=color,shape=shape)) + 
-    scale_color_manual(values = cols, name="Condition") +
-    scale_shape_discrete(name="") +
-    ylab(paste0("PC3 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[3]),"%")) +
-    theme_bw() + theme(text = element_text(size=15)) + theme_bw()
-  print(PCA_Plot_group21)
-  print(PCA_Plot_group13)
-}
-DataMtx=assay(vsd)
-colDataPCA=colData
-shape=colData$Cell
-color=colData$Cond
-PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-#color=colData$Rep
-#PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-```
-# Filter datasets
-Filter out outlying sample (which one?) and uninteresting samples.
-```{r filter}
-# Filter out bad sample an uninteresting samples
-colDataFilt <- colData[-grep("SFA",colData$Sample),]
-colDataFilt <- colDataFilt[-grep("^CD-R3",colDataFilt$Sample),]
-colDataFilt$Cond <- factor(colDataFilt$Cond,levels=c("C","SC","FA","HC","SFA"))
-countDataFilt <- countData[,row.names(colDataFilt)]
-```
-# Quality check
-## PCA
-```{r}
-dds <- DESeqDataSetFromMatrix(countData = countDataFilt, colData = colDataFilt, 
-                              design = ~ Group)
-dds <- DESeq(dds)
-vsd=varianceStabilizingTransformation(dds)
-# DataMtx=assay(vsd)
-# colDataPCA=colDataFilt
-# shape=ifelse(colDataFilt$Cell=="B","BRIGHT","DIM")
-# color=ifelse(colDataFilt$Cond=="FA","FAR",ifelse(colDataFilt$Cond=="C","EXP",
-#              ifelse(colDataFilt$Cond=="SC","STP","HYP")))
-# cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8)]
-# PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-# 
-DataMtx=assay(vsd)
-colDataPCA=colDataFilt
-shape=colDataFilt$Cell
-color=colDataFilt$Cond
-PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-```
-## Clustering complete dataset
-Clustering done on variance stabilized data.
-``` {r heatmap_all}
-#Heatmap of the sample-to-sample distances
-sampleDists <- dist(t(assay(vsd)))
-sampleDistMatrix <- as.matrix(sampleDists)
-rownames(sampleDistMatrix) <- vsd$Sample
-colnames(sampleDistMatrix) <- NULL
-colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
-pheatmap(sampleDistMatrix,
-         clustering_distance_rows=sampleDists,
-         clustering_distance_cols=sampleDists,
-         col=colors,fontsize_row=9)
-```
 # Differential analysis
 ```{r}
@@ -183,6 +64,10 @@ exportResults <- function(results_table, comparaison, alpha=0.05){
  return(list(rownames(down.name), rownames(up.name)))
 }
+dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, 
+                              design = ~ Group)
+dds <- DESeq(dds)
 ```
 ## Downregulated genes
@@ -296,7 +181,7 @@ AnnotationCols=data.frame(Condition=colnames(DataMtx),
                          row.names = colnames(DataMtx))
 AnnotationRows=data.frame(Profile=factor(clust,levels = seq(1,nClust,1)),
                          row.names = names(clust))
-colsC=brewer.pal(n = nClust, name = "Blues")
+colsC=brewer.pal(n = nClust, name = "Greys")
 colsAnnot=list(Condition=c("C"=cols[1],"SC"=cols[2],"FA"=cols[3],"HC"=cols[4]),
               Profile=c("1"=colsC[1],"2"=colsC[2],"3"=colsC[3],"4"=colsC[4]))