FL_02.merge.Rmd

title: "Merge E10.5 and E12.5 Fetal Liver samples from the three different library runs"
author: "Anne Biton"
date: "`r format(Sys.time(), '%d %B, %Y')`"

output:
  prettydoc::html_pretty:
    highlight: github
    number_sections: yes
    theme: cayman
    toc: yes
knitr::opts_chunk$set(echo = FALSE, cache = TRUE, warning = FALSE, message=FALSE, results=FALSE, prompt=FALSE, dpi=150)

library(data.table)
library(ggplot2)
library(foreach)
library(BiocParallel)

library(Biobase)
library(dplyr)
library(tidyr)
library(Seurat)
library(Matrix)


library(Seurat)
library(Matrix)
library(scater)
library(scran)
library(dplyr)
library(scales)
library(batchelor)

dirdata <- "../../data/"

sce <- readRDS(paste0(dirdata,'derived/FL/FL_sce.rds'))
sce$run <- paste0('Run',sce$nextseq500_run)

sce_bycond <- SplitObject(as.Seurat(sce), split.by = 'condition')
sce_bycond_filter <- lapply(sce_bycond, function(x) {
                                      x <- CreateSeuratObject(count=x@assays$RNA@counts,
                                                                  min.cells=round(ncol(x)*5/100),
                                                                  min.features =3000)
                                    })

intgenes <- Reduce(intersect,(lapply(sce_bycond_filter,rownames)))
intgenes <- intgenes[grep('^ERCC-', intgenes, invert=TRUE)]
length(intgenes)
sce_filter <- lapply(sce_bycond, function(x) x[intgenes,])

sce_filter <- Reduce(merge, sce_filter)
sce_merge <- lapply(SplitObject(sce_filter, split.by = 'run'), as.SingleCellExperiment)


# normalization by condition using deconvolution
#https://osca.bioconductor.org/normalization.html#normalization-by-deconvolution
clusters <- lapply(sce_merge, quickCluster, min.size = 50, min.mean=0.1)#, use.ranks=FALSE)
sce_merge <- mapply(sce = sce_merge, cluster = clusters, function(sce, cluster) computeSumFactors(sce, clusters=cluster, min.mean=0.1))
sce_merge <- lapply(sce_merge, scater::logNormCounts)
# multiBatchNorm adjusts for differences in sequencing depth between plates by downscaling all batches to match the coverage of the least-sequenced batch.
sce_merge <- do.call(batchelor::multiBatchNorm,sce_merge)


decs <- lapply(sce_merge, function(x) scran::modelGeneVar(x, block=x$condition_plate))#,min.mean=0.05, loess.args=list(span=0.05))) # ,
decs <- lapply(decs, function(x) x[order(x$bio, decreasing=TRUE),])

par(mfrow=c(4,3))
lapply(decs,function(dec.block.416b) {
blocked.stats <- dec.block.416b$per.block
for (i in colnames(blocked.stats)) {
    current <- blocked.stats[[i]]
    plot(current$mean, current$total, main=i, pch=16, cex=0.5,
        xlab="Mean of log-expression", ylab="Variance of log-expression")
    curfit <- metadata(current)
    points(curfit$mean, curfit$var, col="red", pch=16)
    curve(curfit$trend(x), col='dodgerblue', add=TRUE, lwd=2)
}}
)


combined.decs <- do.call(combineVar, lapply(decs, function(x) x[intgenes,]))
bio <- combined.decs[order(combined.decs$bio, decreasing = TRUE),'bio']
chosen <- names(bio[1:3000])

sceall <- lapply(sce_merge, function(x) x[chosen,])
sceall <- do.call(cbind,sceall)

# keep all genes and not only the variable genes in the count matrices
sce_allgenes <- do.call(cbind,sce_merge)
sceall$condition_run <- paste(sceall$condition, sceall$run, sep='_')
sceall$condition_run_plate <- paste(sceall$condition, sceall$run, sceall$plate, sep='_')


pcs <- batchelor::multiBatchPCA(sceall, batch=sceall$condition_run_plate, BPPARAM= BiocParallel::MulticoreParam(3))
names(pcs)

reducedDim(sceall, "multiBatchPCA") <- do.call(rbind,pcs)[colnames(sceall),]
reducedDim(sce_allgenes, "multiBatchPCA") <-  do.call(rbind,pcs)[colnames(sce_allgenes),]
sceall_seurat <- as.Seurat(sce_allgenes)
VariableFeatures(sceall_seurat) <- chosen

sce_merge <- lapply(SplitObject(as.Seurat(sce), split.by = 'run'), as.SingleCellExperiment)

# normalization by condition using deconvolution
#https://osca.bioconductor.org/normalization.html#normalization-by-deconvolution
clusters <- lapply(sce_merge, quickCluster, min.size = 50, min.mean=0.1)#, use.ranks=FALSE)
sce_merge <- mapply(sce = sce_merge, cluster = clusters, function(sce, cluster) computeSumFactors(sce, clusters=cluster, min.mean=0.1))
sce_merge <- lapply(sce_merge, scater::logNormCounts)
# multiBatchNorm adjusts for differences in sequencing depth between plates by downscaling all batches to match the coverage of the least-sequenced batch.
sce_merge <- do.call(batchelor::multiBatchNorm,sce_merge)
sceallbis <- do.call(cbind,sce_merge)

sceall_seurat[['RNA_raw']] <- Seurat::CreateAssayObject(#counts = counts(sceallbis)[, colnames(sceall_seurat)],
                                                        data = logcounts(sceallbis)[, colnames(sceall_seurat)])
sceall_seurat[['RNA_raw']]@counts <- counts(sceallbis)[, colnames(sceall_seurat)]


mnn.e10.5 <- do.call(batchelor::reducedMNN, c(pcs[grep('E10.5_Fetal_Liver',names(pcs))], args = list(k = 20, auto.merge = TRUE)))
mnn.e12.5 <- do.call(batchelor::reducedMNN, c(pcs[grep('E12.5_Fetal_Liver',names(pcs))], args = list(k = 20, auto.merge = TRUE)))
mnn.final <- batchelor::reducedMNN(mnn.e12.5$corrected,
                                   mnn.e10.5$corrected)


# add fastMNN projection/correction as a new DimReduc object
sceall_seurat[['fastMNN']] <- CreateDimReducObject(embeddings = mnn.final$corrected[colnames(sceall_seurat),], key = 'fastMNN')

# save raw count data
#sceall_seurat[['RNA_raw']] <- Seurat::CreateAssayObject(counts = sce_filter@assays$RNA@counts[, colnames(sceall_seurat)])

sceall_seurat <- RunTSNE(object = sceall_seurat, dims = 1:50, reduction = 'fastMNN', verbose=FALSE)
sceall_seurat <- RunUMAP(object = sceall_seurat, dims = 1:50, reduction = 'fastMNN', verbose=FALSE)

#gridExtra::marrangeGrob(list(
DimPlot(object = sceall_seurat , reduction = "umap", group.by = 'run', pt.size = 0.75, dims = c(1,2), label = FALSE)#,
DimPlot(object = sceall_seurat , reduction = "umap", group.by = 'plate', pt.size = 0.75, dims = c(1,2), label = FALSE)#,
DimPlot(object = sceall_seurat , reduction = "umap", group.by = 'condition', pt.size = 0.75, dims = c(1,2), label = FALSE)#,
FeaturePlot(object = sceall_seurat , reduction = "umap", features =  'nFeature_RNA', pt.size = 1, dims = c(1,2), label = FALSE)#,
FeaturePlot(object = sceall_seurat , reduction = "umap", features =  'percent.mito', pt.size = 1, dims = c(1,2), label = FALSE)#,


ggs <- sapply(c("CD41_Ab", "CD61_Ab", "CD71_Ab",  "AA4_1_Ab", "CD16_32_Ab", "CD34_Ab", "Mk_gate", "pMac_gate", "CD45_Ab"),
              function(i, sceall_seurat) {
                sceall_seurat@meta.data[[i]] <- factor(sceall_seurat@meta.data[[i]], levels = c('1','0', 'NA'))
                gg <- DimPlot(object = sceall_seurat , reduction = "umap",
                              #cells.highlight =which(sceall_seurat[[i]] == '1'),
                              #cols.highlight = 'black',
                              group.by  = i,  pt.size = .5, dims = c(1,2), label = FALSE,
                              cols = c('1'='black', '0'=alpha('grey',.5), 'NA'=alpha('grey',.5))) + ggtitle(i)
                return(gg)
              }, sceall_seurat , simplify = FALSE)

gridExtra::marrangeGrob(ggs, ncol = 3, nrow=3)

ggfs <- sapply(c("FSC_A_MFI", "SSC_A_MFI", "Kit_MFI", "CD41_MFI", "CD61_MFI", "CD71_MFI", "AA4_1_MFI", "CD16_32_MFI", "CD34_MFI", "CD45_MFI"),
              function(i, sceall_seurat) {
                sceall_seurat@meta.data[[i]] <- as.numeric(sceall_seurat@meta.data[[i]])
                gg <- FeaturePlot(object = sceall_seurat , reduction = "umap",
                              #cells.highlight =which(sceall_seurat[[i]] == '1'),
                              #cols.highlight = 'black',
                              features  = i,  pt.size = .5, min.cutoff="q10", max.cutoff="q95", dims = c(1,2), label = FALSE) + ggtitle(i)
                return(gg)
              }, sceall_seurat , simplify = FALSE)

gridExtra::marrangeGrob(ggfs, ncol = 4, nrow=3)


# applying biexponential transormation
# from https://haoeric.com/cytometry-transformation-visual-comparation/
#biexp <- function(x, a = 0.5, b = 1, c = 0.5, d = 1, f = 0, w = 0,
#                  tol = .Machine$double.eps^0.25,
#                  maxit = as.integer(5000)){

#    flowCore:::biexponential_transform(input = x,
#                                       A = a,
#                                       B = b,
#                                       C = c,
#                                       D = d,
#                                       F = f,
#                                       W = w,
#                                       tol = tol,
#                                       maxIt = maxit)
#}


#ggfs <- sapply(c("CD41_MFI", "CD61_MFI", "CD71_MFI",  "AA4_1_MFI", "CD16_32_MFI", "CD34_MFI", "CD45_MFI"),
#              function(i, sceall_seurat) {
#                sceall_seurat@meta.data[[i]] <- biexp(as.numeric(sceall_seurat@meta.data[[i]]))
#                gg <- FeaturePlot(object = sceall_seurat , reduction = "umap",
                              #cells.highlight =which(sceall_seurat[[i]] == '1'),
                              #cols.highlight = 'black',
#                              features  = i,  pt.size = .5, dims = c(1,2), label = FALSE) +
#                  ggtitle(paste(i, 'biexp trans'))
#                return(gg)
#              }, sceall_seurat , simplify = FALSE)

#gridExtra::marrangeGrob(ggfs, ncol = 2, nrow=2)
mca_result <- scMCA::scMCA(scdata = sceall_seurat@assays$RNA@data, numbers_plot = 3)
# assignments to top correlations are in
#mca_result$scMCA

mca_result$scMCA <- structure(.Names = names(mca_result$scMCA),
                              .Data = paste0(mca_result$scMCA, ' (', table(mca_result$scMCA)[mca_result$scMCA], ')'))
sceall_seurat$scMCA <- mca_result$scMCA[colnames(sceall_seurat)]

sceall_seurat$scMCA[grep('\\(1\\)|\\(2\\)|\\(3\\)|\\(4\\)|\\(5\\)|\\(6\\)|\\(7\\)|\\(8\\)|\\(9\\)|\\(10\\)', sceall_seurat$scMCA, perl = FALSE)] <- NA
sceall_seurat$scMCA_score <- apply(mca_result$cors_matrix, 2, max)


DimPlot(object = sceall_seurat , reduction = "umap", group.by = "scMCA", cells = colnames(sceall_seurat)[which(sceall_seurat$scMCA != 'NA')], pt.size = 1, dims = c(1,2))


FeaturePlot(object = sceall_seurat , reduction = "umap", features = "scMCA_score", pt.size = 1, dims = c(1,2))


saveRDS(sceall_seurat, file = paste0(dirdata,'derived/FL/FL_sceall_seurat_fastMNN_bycond.rds'))

saveRDS(sceall_seurat, file = paste0(dirdata,'derived/FL/FL_sceall_seurat_fastMNN_bycond_precluster.rds'))