Commit 77960a81 authored by Anne  BITON's avatar Anne BITON
Browse files

Simplify file names and heatmap plos

parent 260e2207
......@@ -168,13 +168,13 @@ foreach (
}
# density
foreach (
indexLin=c(1,2,4),
labelLin=paste(names(SlingshotDataSet(crv2)@lineages), sapply(SlingshotDataSet(crv2)@lineages, paste0, collapse='-'), sep=': ')[c(1,2,4)]) %do% {
tibble(pseudotime=crv2[[paste0('slingPseudotime_',indexLin)]], FACS=crv2$FACS) %>% na.omit(.) %>% ggplot(.,aes(x=pseudotime, fill=FACS)) + geom_density(alpha=0.5) + ggtitle(labelLin) + scale_fill_manual(values = cols_facs)
}
# foreach (
# indexLin=c(1,2,4),
# labelLin=paste(names(SlingshotDataSet(crv2)@lineages), sapply(SlingshotDataSet(crv2)@lineages, paste0, collapse='-'), sep=': ')[c(1,2,4)]) %do% {
#
# tibble(pseudotime=crv2[[paste0('slingPseudotime_',indexLin)]], FACS=crv2$FACS) %>% na.omit(.) %>% ggplot(.,aes(x=pseudotime, fill=FACS)) + geom_density(alpha=0.5) + ggtitle(labelLin) + scale_fill_manual(values = cols_facs)
#
# }
```
......@@ -182,8 +182,8 @@ foreach (
* The genes whose expression is changing in a continuous manner over pseudotime are extracted by fitting a Generalized Additive Model with a LOESS term for pseudotime.
* The results are in `r paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset.xlsx')` and a summary of the markers associated with each combination of pseudotime is available in
The results are in `r paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset_summary.xlsx')`.
* The results are in `r paste0(dirslingshot,'YS_slingshot_markersLineages.xlsx')` and a summary of the markers associated with each combination of pseudotime is available in
The results are in `r paste0(dirslingshot,'YS_slingshot_markersLineages_summary.xlsx')`.
* The p-values are adjusted using the Bonferroni method.
......@@ -236,7 +236,7 @@ function(x,y) {
}, SIMPLIFY=FALSE)
res_postfiltering %>%
openxlsx::write.xlsx(., file = paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset.xlsx'), asTable = TRUE, row.names = TRUE)
openxlsx::write.xlsx(., file = paste0(dirslingshot,'YS_slingshot_markersLineages.xlsx'), asTable = TRUE, row.names = TRUE)
```
......@@ -248,95 +248,56 @@ res_postfiltering %>%
do.call(rbind,lapply(res_postfiltering, function(x) sum(x$adj.pval < .01))) %>% knitr::kable() %>% print()
```
* The heatmaps below show the top 30 genes (30 smallest p-values) for each lineage. The heatmaps of the top 100 markers are available in `r paste0(dirslingshot,'plots/')`.
* The heatmaps of the top 30 and 100 markers (smallest p-values) for each lineage trajectory are available in `r paste0(dirslingshot,'plots/')`.
```{r plot pdf heatmaps, fig.keep='all', cache.rebuild=FALSE}
```{r plot pdf heatmaps, results='asis'}
library(viridis)
dir.create(paste0(dirslingshot,'plots/'))
nbMarkers2show <- 30
foreach(x=res_postfiltering, tname=names(res_postfiltering)) %do% {
x <- x[x$adj.pval < .01,]
x <- x[order(x$pval),]
topgenes_ps <- as.character(x[1:min(nbMarkers2show,nrow(x)),]$gene)
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
ps <- crv2[[tname]]
names(ps) <- colnames(crv2)
selps <- ps[which(!is.na(ps))]
selps <- names(sort(ps))
mat <- t(apply(sceall@assays@data$logcounts[topgenes_ps,selps], 1, function(x) (x-mean(x))/sd(x)))
mat_breaks <- quantile_breaks(mat, n = 50)
ph_par <- list(mat, show_rownames = T, show_colnames = F,scale='none',
breaks = mat_breaks,
annotation_col = data.frame(pseudotime=ps[selps],
cluster=sceall[,selps]$clusterIdents,
FACS=sceall[,selps]$FACS,
condition=sceall[,selps]$condition,
id=selps, row.names = 'id') %>%
dplyr::rename(!!tname := pseudotime),
annotation_colors = list(cluster=cols[unique(as.character(sceall$clusterIdents))],
FACS=cols_facs[unique(as.character(sceall$FACS))],
condition=cols_condition),
cluster_cols = FALSE, cluster_rows = TRUE, fontsize_row = 9,
color = viridis(length(mat_breaks) - 1))
do.call(pheatmap, c(ph_par, filename = paste0(dirslingshot,'plots/YS_slingshot_top30Markers_',tname,'_usingPostFilteringDataset.pdf'),
width = 8, height = 5))
for (nbMarkers2show in c(30,100)) {
foreach(x=res_postfiltering, tname=names(res_postfiltering)) %do% {
x <- x[x$adj.pval < .01,]
x <- x[order(x$pval),]
topgenes_ps <- as.character(x[1:min(nbMarkers2show,nrow(x)),]$gene)
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
ps <- crv2[[tname]]
names(ps) <- colnames(crv2)
selps <- ps[which(!is.na(ps))]
selps <- names(sort(ps))
mat <- t(apply(sceall@assays@data$logcounts[topgenes_ps,selps], 1, function(x) (x-mean(x))/sd(x)))
mat_breaks <- quantile_breaks(mat, n = 50)
ph_par <- list(mat, show_rownames = T, show_colnames = F,scale='none',
breaks = mat_breaks,
annotation_col = data.frame(pseudotime=ps[selps],
cluster=sceall[,selps]$clusterIdents,
FACS=sceall[,selps]$FACS,
condition=sceall[,selps]$condition,
id=selps, row.names = 'id') %>%
dplyr::rename(!!tname := pseudotime),
annotation_colors = list(cluster=cols[unique(as.character(sceall$clusterIdents))],
FACS=cols_facs[unique(as.character(sceall$FACS))],
condition=cols_condition),
cluster_cols = FALSE, cluster_rows = TRUE, fontsize_row = if (nbMarkers2show == 30) 9 else 3,
color = viridis(length(mat_breaks) - 1))
do.call(pheatmap, c(ph_par, filename = paste0(dirslingshot,'plots/YS_slingshot_top', nbMarkers2show,'Markers_',tname,'.pdf'), width = 8, height = 5))
do.call(pheatmap, ph_par)
return(NULL)
}
nbMarkers2show <- 100
foreach(x=res_postfiltering, tname=names(res_postfiltering)) %do% {
x <- x[x$adj.pval < .01,]
x <- x[order(x$pval),]
topgenes_ps <- as.character(x[1:min(nbMarkers2show,nrow(x)),]$gene)
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
ps <- crv2[[tname]]
names(ps) <- colnames(crv2)
selps <- ps[which(!is.na(ps))]
selps <- names(sort(ps))
mat <- t(apply(sceall@assays@data$logcounts[topgenes_ps,selps], 1, function(x) (x-mean(x))/sd(x)))
mat_breaks <- quantile_breaks(mat, n = 50)
pheatmap(mat,
show_rownames = T, show_colnames = F,scale='none',
breaks = mat_breaks,
annotation_col = data.frame(pseudotime=ps[selps],
cluster=sceall_seurat[,selps]$clusterIdents,
FACS=sceall[,selps]$FACS,
condition=sceall[,selps]$condition,
id=selps, row.names = 'id') %>%
dplyr::rename(!!tname := pseudotime),
annotation_colors = list(cluster=cols[unique(as.character(sceall$clusterIdents))],
FACS=cols_facs[unique(as.character(sceall$FACS))],
condition=cols_condition),
cluster_cols = FALSE, cluster_rows = TRUE, fontsize_row = 9,
filename = paste0(dirslingshot,'plots/YS_slingshot_top100Markers_',tname,'_usingPostFilteringDataset.pdf'),
width = 12, height = 12,
color = viridis(length(mat_breaks) - 1))
}
}
```
......@@ -346,43 +307,44 @@ foreach(x=res_postfiltering, tname=names(res_postfiltering)) %do% {
# We show only the lineage starting at the first MEP cell belonging to the trajectory
x=res_postfiltering[[1]]; tname=names(res_postfiltering)[1]
x=res_postfiltering[[1]];
tname=names(res_postfiltering)[1]
x <- x[x$adj.pval < .01,]
x <- x[order(x$pval),]
topgenes_ps <- as.character(x[1:min(30,nrow(x)),]$gene)
x <- x[x$adj.pval < .01,]
x <- x[order(x$pval),]
topgenes_ps <- as.character(x[1:min(30,nrow(x)),]$gene)
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
quantile_breaks <- function(xs, n = 10) {
breaks <- quantile(xs, probs = seq(0, 1, length.out = n))
breaks[!duplicated(breaks)]
}
ps <- crv2[[tname]]
names(ps) <- colnames(crv2)
selps <- ps[which(!is.na(ps))]
selps <- names(sort(ps))
selps <- selps[(which(sceall[,selps]$clusterIdents == 'MEP'))[3]:length(selps)]
mat <- t(apply(sceall@assays@data$logcounts[topgenes_ps,selps], 1, function(x) (x-mean(x))/sd(x)))
mat_breaks <- quantile_breaks(mat, n = 100)
pheatmap(mat,
show_rownames = T, show_colnames = F,scale='none',
breaks = mat_breaks,
annotation_col = data.frame(pseudotime=ps[selps],
cluster=sceall[,selps]$clusterIdents,
FACS=sceall[,selps]$FACS,
condition=sceall[,selps]$condition,
id=selps, row.names = 'id') %>%
dplyr::rename(!!tname := pseudotime),
annotation_colors = list(cluster=cols[unique(as.character(sceall$clusterIdents))],
FACS=cols_facs[unique(as.character(sceall$FACS))],
condition=cols_condition),
cluster_cols = FALSE, cluster_rows = TRUE, fontsize_row = 9,
main = "Pseudotime 1 restricted to MEP start",
width = 12, height = 12,
color = viridis(length(mat_breaks) - 1))
ps <- crv2[[tname]]
names(ps) <- colnames(crv2)
selps <- ps[which(!is.na(ps))]
selps <- names(sort(ps))
selps <- selps[(which(sceall[,selps]$clusterIdents == 'MEP'))[3]:length(selps)]
mat <- t(apply(sceall@assays@data$logcounts[topgenes_ps,selps], 1, function(x) (x-mean(x))/sd(x)))
mat_breaks <- quantile_breaks(mat, n = 100)
pheatmap(mat,
show_rownames = T, show_colnames = F,scale='none',
breaks = mat_breaks,
annotation_col = data.frame(pseudotime=ps[selps],
cluster=sceall[,selps]$clusterIdents,
FACS=sceall[,selps]$FACS,
condition=sceall[,selps]$condition,
id=selps, row.names = 'id') %>%
dplyr::rename(!!tname := pseudotime),
annotation_colors = list(cluster=cols[unique(as.character(sceall$clusterIdents))],
FACS=cols_facs[unique(as.character(sceall$FACS))],
condition=cols_condition),
cluster_cols = FALSE, cluster_rows = TRUE, fontsize_row = 9,
main = "Pseudotime 1 restricted to MEP start",
width = 12, height = 12,
color = viridis(length(mat_breaks) - 1))
```
......@@ -428,10 +390,10 @@ summarizeMarkerSpecificity <- function(markers_sel, adj.pval_cutoff=0.01) {
```{r summarize association of markers with pseudotimes}
# read result files and extract significant genes
res_postfiltering_names <-
openxlsx::getSheetNames(paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset.xlsx'))
openxlsx::getSheetNames(paste0(dirslingshot,'YS_slingshot_markersLineages.xlsx'))
res_postfiltering <-
lapply(res_postfiltering_names, function(x) openxlsx::read.xlsx(xlsxFile = paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset.xlsx'), sheet=x))
lapply(res_postfiltering_names, function(x) openxlsx::read.xlsx(xlsxFile = paste0(dirslingshot,'YS_slingshot_markersLineages.xlsx'), sheet=x))
names(res_postfiltering) <- res_postfiltering_names
......@@ -444,7 +406,7 @@ res_postfiltering_sel <- lapply(res_postfiltering, function(x) {
# number of genes associated with pseudotime using Bonferroni-adjusted p-value < .01
lapply(res_postfiltering_sel, nrow)
summarizeMarkerSpecificity(markers_sel = res_postfiltering_sel, adj.pval_cutoff = 0.01) %>% cbind(., geneAnnot[match(.$gene,geneAnnot$mgi_symbol),]) %>% openxlsx::write.xlsx(paste0(dirslingshot,'YS_slingshot_markersLineages_usingPostFilteringDataset_summary.xlsx'))
summarizeMarkerSpecificity(markers_sel = res_postfiltering_sel, adj.pval_cutoff = 0.01) %>% cbind(., geneAnnot[match(.$gene,geneAnnot$mgi_symbol),]) %>% openxlsx::write.xlsx(paste0(dirslingshot,'YS_slingshot_markersLineages_summary.xlsx'))
```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment