From a5ecc3e542544ceaa3457a84752c21bd2c010c48 Mon Sep 17 00:00:00 2001
From: Claudia  CHICA <claudia.chica@pasteur.fr>
Date: Tue, 24 May 2022 00:48:47 +0200
Subject: [PATCH] Replace Descriptive_analysis.Rmd

---
 .../Descriptive/Descriptive_analysis.Rmd      | 133 ++----------------
 1 file changed, 9 insertions(+), 124 deletions(-)

diff --git a/Wednesday/Descriptive/Descriptive_analysis.Rmd b/Wednesday/Descriptive/Descriptive_analysis.Rmd
index 145b35f..62fdff0 100644
--- a/Wednesday/Descriptive/Descriptive_analysis.Rmd
+++ b/Wednesday/Descriptive/Descriptive_analysis.Rmd
@@ -23,7 +23,6 @@ knitr::opts_knit$set(progress = FALSE, verbose = FALSE)
 ```{r loadLibraries}
 library("R.utils")
 library("DESeq2")
-library("vsn")
 library("pheatmap")
 library("RColorBrewer")
 library("ggplot2")
@@ -32,7 +31,7 @@ library("dplyr")
 library("knitr")
 library(cluster)
 
-WDIR='./'
+WDIR='/Users/claudia/work/courses/PHINDaccess/FUN-OMICs/Wednesday/Descriptive/'
 setwd(WDIR)
 ```
 
@@ -41,11 +40,11 @@ setwd(WDIR)
 ```{r loadData}
 #Load data
 fileName="./data/tablecounts_raw.csv"
-countData=read.table(fileName,row.names=1,header=T,sep=",")
+countData=read.table(fileName,row.names=1,header=T)
 
 fileName="./data/target.txt"
-colData=read.table(fileName,row.names=4,header=T,sep="\t")
-colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC","SFA"))
+colData=read.table(fileName,row.names=1,header=T)
+colData$Cond=factor(gsub("[BD]$","",colData$Group),levels=c("C","SC","FA","HC"))
 colData$Rep=gsub(".*-","",colData$Sample)
 n=1
 colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(x, nchar(x)-n+1, nchar(x))}))
@@ -53,124 +52,6 @@ colData$Cell=as.vector(apply(as.data.frame(colData$Group),2,function(x) {substr(
 cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8,10)]
 ```
 
-
-# Quality check
-
-The two functions, normTransform (log2) and varianceStabilizingTransformation, have an argument blind, for whether the  transformation  should  be  blind  to  the  sample  information  specified  by  the  design  formula.  When blind equals TRUE (the default), the functions will re-estimate the dispersions using only an intercept (design formula ~1).  This setting should be used in order to compare samples in a manner wholly unbiased by the information about experimental groups.
-
-
-```{r QA}
-design = ~ Group
-dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, design = design)
-dds <- DESeq(dds)
-sizeFactors(dds)
-
-#Effects of transformations on the variance
-notAllZero<-(rowSums(counts(dds))>0)
-ntd <- normTransform(dds)
-vsd <- varianceStabilizingTransformation(dds)
-
-meanSdPlot(assay(ntd[notAllZero,]))
-meanSdPlot(assay(vsd[notAllZero,]))
-```
-
-## PCA
-
-```{r PCA_all, message = FALSE, warning = FALSE, echo = FALSE}
-
-PCA_3PCS <- function(DataMtx,colDataPCA,color,shape,cols) {
-  # Computing PCA
-  pca=prcomp(t(na.omit(DataMtx)),scale = FALSE)
-  exp=ceiling(pca$sdev/sum(pca$sdev)*100)
-  PCA  <- as.data.frame(prcomp(t(na.omit(DataMtx)),scale = FALSE)$x) 
-  
-  PCA_Plot_group21 <- ggplot(data = PCA,aes(x = PC1,y = PC2)) +
-    geom_point(size=5,aes(color=color,shape=shape)) + 
-    scale_color_manual(values = cols, name="Condition") +
-    scale_shape_discrete(name="") +
-    ylab(paste0("PC2 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[1]),"%")) + 
-    theme(text = element_text(size=15)) + theme_bw()
-    theme_bw()
-  PCA_Plot_group13 <- ggplot(data = PCA,aes(x = PC1,y = PC3)) +
-    geom_point(size=5,aes(color=color,shape=shape)) + 
-    scale_color_manual(values = cols, name="Condition") +
-    scale_shape_discrete(name="") +
-    ylab(paste0("PC3 ",ceiling(pca$sdev[2]),"%")) + xlab(paste0("PC1 ",ceiling(pca$sdev[3]),"%")) +
-    theme_bw() + theme(text = element_text(size=15)) + theme_bw()
-  
-  print(PCA_Plot_group21)
-  print(PCA_Plot_group13)
-}
-
-DataMtx=assay(vsd)
-colDataPCA=colData
-shape=colData$Cell
-color=colData$Cond
-PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-
-#color=colData$Rep
-#PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-
-```
-
-# Filter datasets
-
-Filter out outlying sample (which one?) and uninteresting samples.
-
-```{r filter}
-# Filter out bad sample an uninteresting samples
-colDataFilt <- colData[-grep("SFA",colData$Sample),]
-colDataFilt <- colDataFilt[-grep("^CD-R3",colDataFilt$Sample),]
-colDataFilt$Cond <- factor(colDataFilt$Cond,levels=c("C","SC","FA","HC","SFA"))
-
-countDataFilt <- countData[,row.names(colDataFilt)]
-```
-
-# Quality check
-
-## PCA
-```{r}
-
-dds <- DESeqDataSetFromMatrix(countData = countDataFilt, colData = colDataFilt, 
-                              design = ~ Group)
-dds <- DESeq(dds)
-vsd=varianceStabilizingTransformation(dds)
-
-# DataMtx=assay(vsd)
-# colDataPCA=colDataFilt
-# shape=ifelse(colDataFilt$Cell=="B","BRIGHT","DIM")
-# color=ifelse(colDataFilt$Cond=="FA","FAR",ifelse(colDataFilt$Cond=="C","EXP",
-#              ifelse(colDataFilt$Cond=="SC","STP","HYP")))
-# cols=RColorBrewer::brewer.pal(12,name = "Paired")[c(2,4,6,8)]
-# PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-# 
-DataMtx=assay(vsd)
-colDataPCA=colDataFilt
-shape=colDataFilt$Cell
-color=colDataFilt$Cond
-PCA_3PCS(DataMtx,colDataPCA,color,shape,cols)
-
-```
-
-## Clustering complete dataset
-
-Clustering done on variance stabilized data.
-
-``` {r heatmap_all}
-#Heatmap of the sample-to-sample distances
-sampleDists <- dist(t(assay(vsd)))
-sampleDistMatrix <- as.matrix(sampleDists)
-rownames(sampleDistMatrix) <- vsd$Sample
-colnames(sampleDistMatrix) <- NULL
-colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
-pheatmap(sampleDistMatrix,
-         clustering_distance_rows=sampleDists,
-         clustering_distance_cols=sampleDists,
-         col=colors,fontsize_row=9)
-```
-
-
-
 # Differential analysis
 
 ```{r}
@@ -183,6 +64,10 @@ exportResults <- function(results_table, comparaison, alpha=0.05){
   return(list(rownames(down.name), rownames(up.name)))
 }
 
+dds <- DESeqDataSetFromMatrix(countData = countData, colData = colData, 
+                              design = ~ Group)
+dds <- DESeq(dds)
+
 ```
 
 ## Downregulated genes
@@ -296,7 +181,7 @@ AnnotationCols=data.frame(Condition=colnames(DataMtx),
                           row.names = colnames(DataMtx))
 AnnotationRows=data.frame(Profile=factor(clust,levels = seq(1,nClust,1)),
                           row.names = names(clust))
-colsC=brewer.pal(n = nClust, name = "Blues")
+colsC=brewer.pal(n = nClust, name = "Greys")
 colsAnnot=list(Condition=c("C"=cols[1],"SC"=cols[2],"FA"=cols[3],"HC"=cols[4]),
                Profile=c("1"=colsC[1],"2"=colsC[2],"3"=colsC[3],"4"=colsC[4]))
 
-- 
GitLab