Commit 4a561acc authored by Anne  BITON's avatar Anne BITON
Browse files

load MassGeissann dataset

parent 6cd31d37
---
title: "Load an EMP/pMac/Mac dataset: GEO:GSE81760 "
author: "Anne Biton and Laina Freyer"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
prettydoc::html_pretty:
highlight: github
number_sections: yes
theme: cayman
toc: yes
---
```{r setup, eval=TRUE, echo=FALSE, warning = FALSE, message=FALSE, results=FALSE, prompt=FALSE}
knitr::opts_chunk$set(echo = FALSE, cache = TRUE, warning = FALSE, message=FALSE, results=FALSE, prompt=FALSE, dpi=150, widgetframe_self_contained = FALSE, widgetframe_isolate_widgets = TRUE, widgetframe_widgets_dir = 'widgets') #https://cran.r-project.org/web/packages/widgetframe/vignettes/widgetframe_and_knitr.html
library(data.table)
library(foreach)
library(readr)
library(ggplot2)
library(foreach)
library(matrixStats)
library(Seurat)
library(Matrix)
library(scater)
library(scran)
library(dplyr)
library(DropletUtils)
library(scales)
library(tidyverse)
library(SingleCellExperiment)
library(matrixStats)
library(R.utils)
dirdata <- "../../data/"
```
# Load data
Single cells (CD45+) from E10.25 whole embryos, MARS-seq data.
Elvira Mass, Ivan Ballesteros, Matthias Farlik, Florian Halbritter, Patrick Gunther, Lucile Crozet, Christian E Jacome-Galarza, Johanna Klughammer, Yasuhiro Kobayashi, Elisa Gomez-Perdiguero, Joachim Schultze, Marc Beyer, Christoph Bock & Frederic Geissmann. Differentiation and specification of resident tissue macrophages. Science, 4 August 2016. DOI:10.1126/science.aaf4238, PubMed: 27492475
http://macrophage-development.computational-epigenetics.org
GSE81760
The supplementary file provided at http://macrophage-development.computational-epigenetics.org/ contain UMI counts that were already processed and log-normalized.
"""!Sample_description = Single-cells mouse EMPs preMac Macs from whole embryo at day E10.25
!Sample_data_processing = After de-multiplexing, Cell barcode and RMT were extracted from Read 2 and stored in Read 1. Reads with a Phred<27 for any RMT base were removed from further analysis. Next, we removed contamination, by alignment against the e.coli genome and de-multiplexed the unmapped reads by pool barcode and cell barcode. Reads were aligned using Bowtie v 1.1.1 (parameters:“-m 1 -t --best --chunkmbs 64 –strata) to the Ensembl mouse mm10 + ERCC pseudo genome assembly. We used HTSeq-count (v 0.6.0) to quantify gene count levels based on the ensembl exon-model for mm10. Following UMI sequencing errors were filtered nad a gene expression matrix was generated.
!Sample_data_processing = Raw molecule counts were loaded into R and all cells with a mito-to-endogenous gene molecule count of 0.15, with less than 320 unique molecule counts or 150 unique genes were removed from further analysis. In addition we filtered for mitichondrial, ribosomal and predicted genes."""
```{r}
#read log normalised umitab
umicounts <- data.table::fread(input =paste0(dirdata,"external/Geissmann/GSM2175164_processed_log_gene_expression.txt"), data.table = FALSE)
rownames(umicounts) <- umicounts[,1]
umicounts[,1] <- NULL
```
## Cluster annotation
Cluster assigments from co-author Patrick Guenther.
```{r}
annot <- readxl::read_xlsx(paste0(dirdata, 'external/Geissmann/Clusters.xlsx'))
rownames(annot) <- annot$cellID
sce_seurat_FG <- CreateSeuratObject(counts = umicounts, assay = 'RNA', meta.data = annot)
```
## Cell filtering
Cells have already been filtered therefore those remaining with low number of detected genes or UMIs were not removed to be consistent with number of cells published in the paper.
## Number of UMIs and number of detected genes
```{r nb reads and zeros, cache=FALSE}
nbUMIs <- colSums(umicounts[,-1])
non0genes <- colSums(umicounts[,-1] > 0)
only0genes <- colSums(umicounts[,-1] == 0)
only0cells <- colSums(umicounts[,-1] == 0)
nb0genes <- colSums(umicounts[,-1] == 0)
prop0genes <- colSums(umicounts[,-1] == 0)/nrow(umicounts)
nbnot0genes <- colSums(umicounts[,-1] > 0)
```
```{r, fig.width=4, fig.height=3}
hist(log2(nbUMIs), breaks = 60)
hist(nbnot0genes, breaks = 60)
```
## Number of UMIs versus number of detected genes
blue line = 2,000 cutoff
pink line = 2,750 cutoff
red line = 3,000 cutoff
```{r scatter, fig.height=3, fig.width=3}
{smoothScatter(x=nbnot0genes,
y=nbUMIs,
xlab='#detected genes', ylab='#UMIs')#,
# xlim = c(0,11000), ylim = c(0, 30000))
abline(v=3000, col = 'red', lty = 2)
abline(v=2750, col = 'pink', lty = 2)
abline(v=2000, col = 'blue', lty = 2)
}
```
```{r, results=TRUE}
list(nbCells_atLeast2000genes =sum(nbnot0genes > 2000),
nbCells_atLeast2750genes =sum(nbnot0genes > 2750),
nbCells_atLeast3000genes =sum(nbnot0genes > 3000))
```
$nbCells_atLeast2000genes = 266
$nbCells_atLeast2750genes = 201
$nbCells_atLeast3000genes = 161
# Clustering
Data normalization and variable feature selection using `Seurat::NormalizeData` and `Seurat::FindVariableFeatures`.
```{r, fig.height=3, fig.width=5}
sce_seurat_FG <- FindVariableFeatures(sce_seurat_FG, selection.method = "vst", nfeatures = 1000, dispersion.cutoff = c(0.75, Inf))
VlnPlot(object= sce_seurat_FG, features = c('nCount_RNA','nFeature_RNA'), ncol=2)#,'percent_mito'))
```
```{r}
sce_seurat_FG$tech <- "MARSSeq"
sce_seurat_FG$dataset <- "Geissmann"
sce_seurat_FG$condition <- "E10.25_Embryo"
# Create new metadata column
sce_seurat_FG$cell_type <- paste(sce_seurat_FG$Cluster_FG)
# Switch ident to new column
Idents(sce_seurat_FG) <- "cell_type"
sce_seurat_FG <- RenameIdents(sce_seurat_FG, '1'="EMP_FG", '2'="pMac_FG", '3'="Mac_FG")
sce_seurat_FG$cell_type <- paste(Idents(sce_seurat_FG))
#cluster colors to match EGP dataset in different shade
cols_celltype <- c('EMP_FG' = "#5e7535", 'pMac_FG' = "#cf3a29", 'Mac_FG' = "#383838")
cols_celltype <- structure(.Data=alpha(cols_celltype, .8), .Names=names(cols_celltype))
sce_FG <- as.SingleCellExperiment(sce_seurat_FG)
saveRDS(cols_celltype, file=paste0(dirdata,'external/Geissmann/cols_celltype_FG.rds'))
saveRDS(sce_seurat_FG, file=paste0(dirdata,'external/Geissmann/sce_seurat_FG.rds'))
saveRDS(sce_FG, file=paste0(dirdata,'external/Geissmann/sce_FG.rds'))
```
```{r, fig.height=3, fig.width=4}
all.genes <- rownames(sce_seurat_FG)
sce_seurat_FG <- ScaleData(sce_seurat_FG, features = all.genes)
sce_seurat_FG <- RunPCA(sce_seurat_FG, features = VariableFeatures(object = sce_seurat_FG), npcs=20)
ElbowPlot(sce_seurat_FG)
```
## tSNE
```{r, fig.height=3, fig.width=4}
sce_seurat_FG <- FindNeighbors(sce_seurat_FG, dims = 1:7)
sce_seurat_FG <- RunTSNE(object = sce_seurat_FG, dims = 1:7)
sce_seurat_FG <- RunUMAP(object = sce_seurat_FG, dims = 1:7)
DimPlot(object = sce_seurat_FG , reduction = "tsne", group.by = "Cluster_FG", pt.size = 1, dims = c(1,2), label = FALSE)
DimPlot(object = sce_seurat_FG , reduction = "umap", group.by = "Cluster_FG", pt.size = 1, dims = c(1,2), label = FALSE)
DimPlot(object = sce_seurat_FG , reduction = "tsne", group.by = "cell_type", cols = cols_celltype, pt.size = 1, dims = c(1,2), label = FALSE)
FeaturePlot(object = sce_seurat_FG , reduction = "tsne", features = 'Kit', pt.size = 1, dims = c(1,2), label = FALSE)
FeaturePlot(object = sce_seurat_FG , reduction = "tsne", features = 'Ptprc', pt.size = 1, dims = c(1,2), label = FALSE)
FeaturePlot(object = sce_seurat_FG , reduction = "tsne", features = 'Plac8', pt.size = 1, dims = c(1,2), label = FALSE)
FeaturePlot(object = sce_seurat_FG , reduction = "tsne", features = 'Maf', pt.size = 1, dims = c(1,2), label = FALSE)
FeaturePlot(object = sce_seurat_FG , reduction = "tsne", features = 'Adgre1', pt.size = 1, dims = c(1,2), label = FALSE)
```
## Cluster Signatures
```{r, fig.height=3, fig.width=12}
sce_seurat_FG$groups <- factor(sce_seurat_FG$cell_type, levels = c('EMP_FG', 'pMac_FG', 'Mac_FG'))
# These are markers for EMP
VlnPlot(sce_seurat_FG, c("Gata1","Gata2","Kit","Itga2b"), group.by = 'groups', ncol = 4)
# These are markers for pMac
VlnPlot(sce_seurat_FG, c("Ptprc","Plac8","Csf1r","Irf8"), group.by = 'groups', ncol = 4)
# These are markers for Mac
VlnPlot(sce_seurat_FG, c("Maf","Adgre1","Cd68","Cx3cr1"), group.by = 'groups', ncol = 4)
```
# Data Storage
A SingleCellExperiment object containing the filtered UMI counts is saved in the RData file `scMCE/data/derived/external/Geissmann/sce_FG.rds`.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment