Commit c34119b5 authored by Anne  BITON's avatar Anne BITON
Browse files

Replace YS_01.load.Rmd

parent cc48c64d
......@@ -40,76 +40,13 @@ dirdata <- "../../data/"
## All runs
```{r}
files_ys <- list.files(paste0(dirdata, 'raw/libraryRun123/umi.tab.YS/'), full.names = TRUE)
umicounts <- lapply(files_ys, function(x) {
#col names
cn <- unlist(fread(x, nrows = 1, stringsAsFactors = F, header=F)[1,])
cn <- na.omit(cn)
dt = fread(x, header = F, skip = 1)
setnames(dt, c('gene',cn))
return(dt)
})
names(umicounts) <- gsub('.*__|.txt', '', basename(files_ys))
# get genes available in all plates
allgenes <- Reduce(intersect,lapply(umicounts, function(x) x$gene))
umicounts <- lapply(umicounts, function(x) x[match(allgenes,gene),])
umicounts <- lapply(umicounts, function(x) x[,-1,with=F])
umicounts <- do.call(cbind,umicounts)
umicounts <- cbind(data.table(gene=allgenes,umicounts))
colnames(umicounts) <- gsub('aMCE2020_._200206_|aMCE2020_.._200206_', '', colnames(umicounts))
colnames(umicounts) <- gsub('_5_Yolk', '.5_Yolk', colnames(umicounts))
colnames(umicounts) <- gsub('_Yolk_Sac', '_YolkSac', colnames(umicounts))
# zero counts were assigned to the few cells that came from the Fetal Liver for mixed plates from the sort (Plates 15 and 16 Run2), so we remove them now.
umicounts <- umicounts[,which(colSums(umicounts[,-1]) > 0), with=FALSE]
```
```{r}
annot_ys <- readxl::read_xlsx(paste0(dirdata, 'raw/libraryRun123/Summary_Cells.xlsx'))
colnames(annot_ys) <- gsub(' ', '_', tolower(colnames(annot_ys)))
annot_ys <- annot_ys %>% mutate(condition = gsub(' ', '_', annot_ys$condition))
annot_ys <- annot_ys %>% mutate(tissue = gsub(' ', '_', annot_ys$tissue))
annot_ys <- annot_ys %>% mutate(plate = gsub('.$', '', annot_ys$halfplate))
annot_ys <- annot_ys %>% mutate(condition_plate = paste(condition,plate, sep='_'))
annot_ys <- annot_ys %>% mutate(condition_halfplate = paste(condition,halfplate, sep='_'))
annot <- annot_ys[match(gsub('\\.P.*', '', colnames(umicounts))[-1],gsub('.txt', '', annot_ys$filename)),]
annot <- annot %>% mutate(cellID=colnames(umicounts)[-1])
annot <- annot %>% mutate(run = nextseq500_run)
```
```{r load FACS}
facsfile <- paste0(dirdata, '/raw/libraryRun123/FACS/Summary_IndexSort_YS.xlsx')
sheets <- readxl::excel_sheets(facsfile)
facs <- lapply(sheets, function(x) readxl::read_xlsx(facsfile, sheet = x))
names(facs) <- sheets
names(facs) <- gsub('YS', '_Yolk_Sac', names(facs))
names(facs) <- gsub(' ', '_', names(facs))
facs <- do.call(rbind, mapply(x=facs, n=names(facs), function(x,n) {
x$well_id <- paste0(n,'.',x$well_id);
x
}, SIMPLIFY=F))
facs <- as.data.table(facs)
facs[A == 1, FACS := 'A']
facs[B == 1, FACS := 'B']
facs[C == 1, FACS := 'C']
facs[D == 1, FACS := 'D']
facs[Mk == 1, FACS := 'Mk_cells']
facs[Mf == 1, FACS := 'Mf_cells']
```{r read saved data }
annot <- readRDS(paste0(dirdata,'raw/YS_annot.rds'))
umicounts1 <- readRDS(paste0(dirdata,'raw/YS_umicounts1.rds'))
umicounts2 <- readRDS(paste0(dirdata,'raw/YS_umicounts2.rds'))
umicounts <- cbind(umicounts1, umicounts2[,-1])
rm(umicounts1,umicounts2)
```
We loaded UMI counts data coming from `r length(unique(annot$run))` library/sequencing runs and `r length(unique(annot$plate))` plates, for `r length(unique(annot$condition))` conditions.
......@@ -422,7 +359,7 @@ The number of cells selected per condition and genotype is: `r table(sceall$cond
```{r}
colData(sce) <- cbind(colData(sce), facs[match(paste0(sce$condition, '_Run', sce$nextseq500_run, '_P', sce$plate, '.', gsub('.*\\.', '', sce$cellID)), facs$well_id),])
colData(sce) <- annot[match(colnames(sce), annot$cellID),]
save(sce, file=paste0(dirdata,'derived/YS_library123/YS_library123_sce.rda'))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment