Commit 7b9f963a authored by Anne  BITON's avatar Anne BITON
Browse files

making enrichment analysis run

parent 22a6becb
......@@ -47,7 +47,7 @@ dirdata <- "../../data/"
We load the data where `fastMNN` was used to merge the three different library runs by: first merging by run for each condition, then merging runs within each condition, and finally merging the two conditions.
```{r}
filesce <- paste0(dirdata, "derived/YS_library123/YS_sceall_seurat_fastMNN_bycondbyrun_withoutMitoGenes.rda")
filesce <- paste0(dirdata, "derived/YS/YS_sceall_seurat_fastMNN_bycondbyrun_withoutMitoGenes.rda")
base::load(filesce)
```
......@@ -181,7 +181,7 @@ multiplot(ggclusterk9, ggclusterk10, ggclusterk11, ggclusterk12, ggclusterk13, g
clusteringchoice <- 'snn_scran_k13'
filelabel <- "snnRandomWalksClustering_k13"
plottitle <- 'k=13'
dircluster <- paste0(dirdata,'derived/YS_library123/cluster_markers_withoutMitoGenes/')
dircluster <- paste0(dirdata,'derived/YS/cluster_markers_withoutMitoGenes/')
dir.create(dircluster)
......@@ -215,6 +215,7 @@ print(ggcluster)
```{r plot interactive UMAP plot of picked clustering, fig.width=9, fig.height=7, results=TRUE}
library(plotly)
sceall_seurat$cellID <- colnames(sceall_seurat)
g <- ggplot(as.data.table(cbind(sceall_seurat@reductions$umap@cell.embeddings, sceall_seurat@meta.data)), aes(x=UMAP_1, y=UMAP_2,color=clusterIdents)) + geom_point(aes(text=paste("cell:", cellID, "\nrun:", run, "\nFACS:", FACS)))+ scale_color_manual(values =cluscol)
ggplotly(g)
```
......@@ -241,7 +242,7 @@ Idents(sceall_seurat) <- "clusterIdents"
# save cluster annotations
cell2cluster_YS_run123 <- structure(.Names = colnames(sceall_seurat), .Data= as.character(Idents(sceall_seurat)))
save(cell2cluster_YS_run123, dcol, file = paste0(dirdata,'derived/YS_library123/cell2cluster_YS_run123.rda'))
save(cell2cluster_YS_run123, dcol, file = paste0(dirdata,'derived/YS/cell2cluster_YS_run123.rda'))
```
......@@ -290,7 +291,7 @@ summarizeMarkerSpecificity <- function(markers.out, fdr_cutoff) {
Markers of the clusters are obtained using the function `findMarkers` (which by default performs a t-test) and are written in the folder 'data/derived/YS_library123/cluster_markers/'.
Markers of the clusters are obtained using the function `findMarkers` (which by default performs a t-test) and are written in the folder 'data/derived/YS/cluster_markers/'.
In this version, we keep genes that are found to be differential in at least half of the pairwise comparisons between clusters, i.e. the combined p-value across comparisons is chosen to be the "middle-most" p-value of all tests (cluster comparisons) performed for a given gene. This is achieved using the option `pval.type="some"` in the `findMarkers` function.
......@@ -366,7 +367,7 @@ The top 5 enriched gene sets of a subset of collections of gene sets are shown b
The complete results are stored in the files `r paste0(dircluster,"/ORA_pathways_MsigDB/YS_fastMNN_markersDiffInHalfComparisons_ORA_", filelabel, '*')`.
```{r pathway analysis, eval = FALSE}
```{r pathway analysis, eval = TRUE}
library(EGSEA)
library(openxlsx)
......@@ -415,6 +416,8 @@ names(resora) = names(markers.out)
# write in excel tables
dir.create(paste0(dircluster,'/ORA_pathways_MsigDB'))
dir.create(paste0(dircluster,"/ORA_pathways_MsigDB/plots/"))
foreach( comp = names(resora)) %do% {
getres <- mapply(res = resora[[comp]]@results,
degenes = resora[[comp]]@gs.annots,
......@@ -586,7 +589,6 @@ readAllORA<- function(resora_files,
* The plots of the top `r maxGeneSetsShown` or all gene sets with adjusted p-value lower than `r padj_threshold` are saved in in `r paste0(dircluster,"/ORA_pathways_MsigDB/plots/")`.
```{r plot ORA results as circles, fig.width=9, fig.height=6}
......@@ -675,6 +677,8 @@ foreach (catsel=cat2plot) %do% {
```
* The plots of the top `r maxGeneSetsShown` or all gene sets with adjusted p-value lower than `r padj_threshold` are saved in in `r paste0(dircluster,"/ORA_pathways_MsigDB/plots/")`.
#### Distribution of conditions/genotypes and FACS annotations across clusters
......@@ -782,7 +786,7 @@ print(gridExtra::marrangeGrob(list(d12umap_cyclone),ncol = 1, nrow=1,
# Differential expression analysis between E9.5 and E10.5 within the MEP, My, EMP1, EMP2, EMP3, MEP, and pMk1 clusters
Results and plots are in `r paste0(dirdata,'derived/YS_library123/CLUSTERID_E10.5vsE9.5_withoutMitoGenes')`.
Results and plots are in `r paste0(dirdata,'derived/YS/CLUSTERID_E10.5vsE9.5_withoutMitoGenes')`.
On the MA-plots and heatmap plots, the names of the genes with FDR < 1% and |logFC| > 0.9 are shown.
......@@ -795,11 +799,11 @@ On the MA-plots, the genes with FDR < 1% are shown as black dots.
fdrcutoff <- .01
logFCcutoff <- .9
dir.create(paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5'))
dir.create(paste0(dirdata,'derived/YS/E10.5_vs_E9.5'))
ma_ph <- foreach (clustersel = c('My', 'EMP1', 'EMP2', 'EMP3', 'MEP', 'pMk1')) %do% {
dir.create(paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/'))
dir.create(paste0(dirdata,'derived/YS/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/'))
# write markers without using run as a blocking variable
clus_comparetp <- findMarkers(sceall[,sceall$clusterIdents == clustersel],
......@@ -816,9 +820,9 @@ ma_ph <- foreach (clustersel = c('My', 'EMP1', 'EMP2', 'EMP3', 'MEP', 'pMk1')) %
res$PValue <- res$p.value
# write results
dir.create(paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/'))
dir.create(paste0(dirdata,'derived/YS/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/'))
openxlsx::write.xlsx(cbind(res[, c('geneName', 'AveExpr', 'p.value', 'FDR', 'logFC')],geneAnnot[match(res$geneName,geneAnnot$mgi_symbol),]),
file = paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes.xlsx'))
file = paste0(dirdata,'derived/YS/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes.xlsx'))
# MA-plot
ggma <- ggplot(data = res, aes(x = AveExpr, y = logFC)) +
......@@ -832,7 +836,7 @@ ma_ph <- foreach (clustersel = c('My', 'EMP1', 'EMP2', 'EMP3', 'MEP', 'pMk1')) %
labs(title = paste0('E10.5 vs E9.5 within ',clustersel,' cluster'), x = "Average log UMI counts", y = "logFC", color = paste0("FDR < ",fdrcutoff*100,'%')) + theme_minimal() + coord_cartesian(ylim = c(-2.1,2.1))
pdf(paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes_MAplots.pdf'), 8, 8)
pdf(paste0(dirdata,'derived/YS/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes_MAplots.pdf'), 8, 8)
print(ggma)
dev.off()
......@@ -870,7 +874,7 @@ ma_ph <- foreach (clustersel = c('My', 'EMP1', 'EMP2', 'EMP3', 'MEP', 'pMk1')) %
color = viridis(length(mat_breaks) - 1))
do.call(pheatmap,
c(ph_par, filename = paste0(dirdata,'derived/YS_library123/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes_heatmap.pdf'),
c(ph_par, filename = paste0(dirdata,'derived/YS/E10.5_vs_E9.5/',clustersel,'_E10.5vsE9.5_withoutMitoGenes/',clustersel,'_E10.5vsE9.5_withoutMitoGenes_heatmap.pdf'),
width = 12, height = 6))
return(list(maplot=ggma, pheatmap=ph_par))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment