From a5ecc3e542544ceaa3457a84752c21bd2c010c48 Mon Sep 17 00:00:00 2001 From: Claudia CHICA <claudia.chica@pasteur.fr> Date: Tue, 24 May 2022 00:48:47 +0200 Subject: [PATCH] Replace Descriptive_analysis.Rmd --- .../Descriptive/Descriptive_analysis.Rmd | 133 ++---------------- 1 file changed, 9 insertions(+), 124 deletions(-) diff --git a/Wednesday/Descriptive/Descriptive_analysis.Rmd b/Wednesday/Descriptive/Descriptive_analysis.Rmd index 145b35f..62fdff0 100644 --- a/Wednesday/Descriptive/Descriptive_analysis.Rmd +++ b/Wednesday/Descriptive/Descriptive_analysis.Rmd @@ -23,7 +23,6 @@ knitr::opts_knit$set(progress = FALSE, verbose = FALSE) ```{r loadLibraries} library("R.utils") library("DESeq2") -library("vsn") library("pheatmap") library("RColorBrewer") library("ggplot2") @@ -32,7 +31,7 @@ library("dplyr") library("knitr") library(cluster) -WDIR='./' +WDIR='/Users/claudia/work/courses/PHINDaccess/FUN-OMICs/Wednesday/Descriptive/' setwd(WDIR) ``` @@ -41,11 +40,11 @@ setwd(WDIR) ```{r loadData} #Load data fileName="./data/tablecounts_raw.csv" -countData=read.table(fileName,row.names=1,header=T,sep=",") +countData=read.table(fileName,row.names=1,header=T) fileName="./data/target.txt" -colData=read.table(fileName,row.names=4,header=T,sep="\t") -colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC","SFA")) +colData=read.table(fileName,row.names=1,header=T) +colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC")) colData$Rep=gsub(".*-","",colData$Sample) n=1 colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(x, nchar(x)-n+1, nchar(x))})) @@ -53,124 +52,6 @@ colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr( cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8,10)] ``` - -# Quality check - -The two functions, normTransform (log2) and varianceStabilizingTransformation, have an argument blind, for whether the transformation should be blind to the sample information specified by the design formula. When blind equals TRUE (the default), the functions will re-estimate the dispersions using only an intercept (design formula ~1). This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups. - - -```{r QA} -design = ~ Group -dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = design) -dds <- DESeq(dds) -sizeFactors(dds) - -#Effects of transformations on the variance -notAllZero<-(rowSums(counts(dds))>0) -ntd <- normTransform(dds) -vsd <- varianceStabilizingTransformation(dds) - -meanSdPlot(assay(ntd[notAllZero,])) -meanSdPlot(assay(vsd[notAllZero,])) -``` - -## PCA - -```{r PCA_all, message = FALSE, warning = FALSE, echo = FALSE} - -PCA_3PCS <- function(DataMtx,colDataPCA,color,shape,cols) { - # Computing PCA - pca=prcomp(t(na.omit(DataMtx)),scale = FALSE) - exp=ceiling(pca$sdev/sum(pca$sdev)*100) - PCA <- as.data.frame(prcomp(t(na.omit(DataMtx)),scale = FALSE)$x) - - PCA_Plot_group21 <- ggplot(data = PCA,aes(x = PC1,y = PC2)) + - geom_point(size=5,aes(color=color,shape=shape)) + - scale_color_manual(values = cols, name="Condition") + - scale_shape_discrete(name="") + - ylab(paste0("PC2 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[1]),"%")) + - theme(text = element_text(size=15)) + theme_bw() - theme_bw() - PCA_Plot_group13 <- ggplot(data = PCA,aes(x = PC1,y = PC3)) + - geom_point(size=5,aes(color=color,shape=shape)) + - scale_color_manual(values = cols, name="Condition") + - scale_shape_discrete(name="") + - ylab(paste0("PC3 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[3]),"%")) + - theme_bw() + theme(text = element_text(size=15)) + theme_bw() - - print(PCA_Plot_group21) - print(PCA_Plot_group13) -} - -DataMtx=assay(vsd) -colDataPCA=colData -shape=colData$Cell -color=colData$Cond -PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -#color=colData$Rep -#PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -``` - -# Filter datasets - -Filter out outlying sample (which one?) and uninteresting samples. - -```{r filter} -# Filter out bad sample an uninteresting samples -colDataFilt <- colData[-grep("SFA",colData$Sample),] -colDataFilt <- colDataFilt[-grep("^CD-R3",colDataFilt$Sample),] -colDataFilt$Cond <- factor(colDataFilt$Cond,levels=c("C","SC","FA","HC","SFA")) - -countDataFilt <- countData[,row.names(colDataFilt)] -``` - -# Quality check - -## PCA -```{r} - -dds <- DESeqDataSetFromMatrix(countData = countDataFilt, colData = colDataFilt, - design = ~ Group) -dds <- DESeq(dds) -vsd=varianceStabilizingTransformation(dds) - -# DataMtx=assay(vsd) -# colDataPCA=colDataFilt -# shape=ifelse(colDataFilt$Cell=="B","BRIGHT","DIM") -# color=ifelse(colDataFilt$Cond=="FA","FAR",ifelse(colDataFilt$Cond=="C","EXP", -# ifelse(colDataFilt$Cond=="SC","STP","HYP"))) -# cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8)] -# PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) -# -DataMtx=assay(vsd) -colDataPCA=colDataFilt -shape=colDataFilt$Cell -color=colDataFilt$Cond -PCA_3PCS(DataMtx,colDataPCA,color,shape,cols) - -``` - -## Clustering complete dataset - -Clustering done on variance stabilized data. - -``` {r heatmap_all} -#Heatmap of the sample-to-sample distances -sampleDists <- dist(t(assay(vsd))) -sampleDistMatrix <- as.matrix(sampleDists) -rownames(sampleDistMatrix) <- vsd$Sample -colnames(sampleDistMatrix) <- NULL -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - clustering_distance_rows=sampleDists, - clustering_distance_cols=sampleDists, - col=colors,fontsize_row=9) -``` - - - # Differential analysis ```{r} @@ -183,6 +64,10 @@ exportResults <- function(results_table, comparaison, alpha=0.05){ return(list(rownames(down.name), rownames(up.name))) } +dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, + design = ~ Group) +dds <- DESeq(dds) + ``` ## Downregulated genes @@ -296,7 +181,7 @@ AnnotationCols=data.frame(Condition=colnames(DataMtx), row.names = colnames(DataMtx)) AnnotationRows=data.frame(Profile=factor(clust,levels = seq(1,nClust,1)), row.names = names(clust)) -colsC=brewer.pal(n = nClust, name = "Blues") +colsC=brewer.pal(n = nClust, name = "Greys") colsAnnot=list(Condition=c("C"=cols[1],"SC"=cols[2],"FA"=cols[3],"HC"=cols[4]), Profile=c("1"=colsC[1],"2"=colsC[2],"3"=colsC[3],"4"=colsC[4])) -- GitLab