diff --git a/src/R/venns.R b/src/R/venns.R index f2d14cb8e2182c2625b292595a3ff21c16b5fde6..92395d711a28f12e0d397a29311b397323538c9d 100644 --- a/src/R/venns.R +++ b/src/R/venns.R @@ -1,113 +1,288 @@ -library(GenomicFeatures) library(data.table) +library(GenomicRanges) +library(foreach) +library(gplots) -plotVenns <- function(exp1='PeakDiffExpression/Cecum_all_MaxMaxValues_3/Cecum_all_MaxMaxValues_3.RData', - exp2='PeakDiffExpression/Liver_all_MaxMaxValues_3/Liver_all_MaxMaxValues_3.RData', - bed_ref="Genome/pc_ep_mouse_mm10.bed", - filename="venn_LiverAll_CecumAll_ref.pdf", - select_counts=FALSE) { - load(exp1) + +#' Compute overlaps between peaks and MetDB sites +#' @param exp1 RData file output of m6amaker pipeline for the first MeRIPSeq experiment +#' @param exp2 RData file output of m6amaker pipeline for the second MeRIPSeq experiment +#' @param bed_ref BED file containing list of regions contained in the MetDB database for mm10 +#' @param bed_ref_lifted BED file containing list of regions contained in the MetDB database for hg38 lifted over mm10 +#' @param dir directory where to save files +#' @param filename suffix of pdf file where to save plots +#' @param refexp MetDB reference file containing description of each dataset stored in the database +#' @param draw_venn_allMetDb TRUE if the venn diagramm summarizing the overlaps of the two experiments with MetDB should be drawn +#' @param draw_int_perCond TRUE if the barplot showing overlaps between each dataset type/condition available in MetDB and each MeRIPSeq experiment should be drawn +#' +plotIntersectionMetDB <- function(exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData', + exp2='PeakDiffExpression/Liver_allWithoutEc_T13_MaxMaxValues_3/Liver_allWithoutEc_T13_MaxMaxValues_3.RData', + bed_ref="Genome/pc_ep_mouse_mm10.bed", + bed_ref_lifted="Genome/pc_ep_human_hg38_liftOver2mm10_conversions.bed", + dir='.', + filename="LiverAll_CecumAll_ref.pdf", + refexp="Genome/metdb_v2_exp.csv", + select_counts=FALSE, + draw_venn_allMetDb=TRUE, + draw_int_perCond=FALSE +) { + + # load metdb_v2 experiment description + # reformat conditions such that identical conditions get identical names + refexp <- fread("Genome/metdb_v2_exp.csv") + setnames(refexp, c('id', 'id2', 'species', 'cellline', 'tissue', 'note')) + refexp[, condition := paste0(tissue, ': ', cellline)] + refexp[, condition := gsub('^: |: $', '', condition)] + #table(refexp$condition) + refexp[condition == 'A549', condition := 'Lung cancer: A549'] + refexp[, condition := tolower(condition)] + + # select human data + # reannotate conditions such that we can merge samples more homogeneously + refexphuman <- refexp[species %in% c('Homo sapiens'),] + refexphuman[grep('hepg2', condition, ignore.case = TRUE), condition := 'liver: HepG2'] + refexphuman[grep('h1a|h1b', condition, ignore.case = TRUE), condition := 'h1a/h1b endoderm or embryonic stem'] + refexphuman[grep('hek293t', condition, ignore.case = TRUE), condition := 'embryonic kidney: hek293t'] + + # select mouse data + refexp <- refexp[species %in% c('Mus musculus'),] + # reannotate conditions such that we can merge samples more homogeneously + refexp[condition %in% c("adult brain", "embyronic brain") , condition := 'brain'] + refexp[grep('hek293', condition), condition := 'HEK293 derivative, HEK293T embryonic kidney'] + refexp[grep('bvsc', condition), condition := "bvsc embryonic stem cells, bvsc embryonic bodies"] + refexp[grep('mef', condition), condition := "MEF cells"] + refexp[grep('hepg2', condition), condition := 'liver: HepG2'] + + + # load peak annotations of exp2 + load(exp2) + peaksgr <- copy(peaks) + setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) + peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") + if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)] + names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver)) + peaksgr_liveror <- peaksgr_liver + peaksgr_liverde <- peaksgr_liver[paste0('Liver_', diff_peaks)] + peaksgr_liverde_unique <- reduce(peaksgr_liverde) + # load peak annotations of exp1 + exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData' + load(exp1) peaksgr <- copy(peaks) setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) peaksgr_cecum <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") if (select_counts) peaksgr_cecum <- peaksgr_cecum[rownames(counts_epi)] names(peaksgr_cecum) <- paste0('Cecum_',names(peaksgr_cecum)) + peaksgr_cecumor <- peaksgr_cecum + peaksgr_cecumde <- peaksgr_cecum[paste0('Cecum_', diff_peaks)] + peaksgr_cecumde_unique <- reduce(peaksgr_cecumde) - # load MERIP reference sites + # load MERIP reference sites / MOUSE m6aref <- fread(bed_ref)#sb_m6a_mouse_mm10.bed") setnames(m6aref, c('chr', 'start', 'end', 'type', 'id', 'strand')) m6arefgr <- makeGRangesFromDataFrame(as.data.frame(m6aref), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr") - m6arefgr <- reduce(m6arefgr) + m6arefgr$dataset <- sub('_[0-9]+$','', as.character(m6arefgr$id)) names(m6arefgr) <- paste0('ref_',1:length(m6arefgr)) + m6arefgror <- m6arefgr + # length selection + #m6arefgror<- m6arefgr <- m6arefgr[width(m6arefgr) < 5000, ] + + # load MERIP reference sites / HUMAN LIFTED OVER TO MOUSE + m6arefhuman <- fread(bed_ref_lifted)#sb_m6a_mouse_mm10.bed") + setnames(m6arefhuman, c('chr', 'start', 'end', 'id', 'chr_mm10_jesaispas', 'strand', 'start_mm10', 'end_mm10', 'supp')) + m6arefhumangr <- makeGRangesFromDataFrame(as.data.frame(m6arefhuman), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr") + m6arefhumangr$dataset <- sub('_[0-9]+$','', as.character(m6arefhumangr$id)) + names(m6arefhumangr) <- paste0('ref_',1:length(m6arefhumangr)) + m6arefhumangror <- m6arefhumangr + # length selection + #m6arefhumangror<- m6arefhumangr <- m6arefhumangr[width(m6arefhumangr) < 5000, ] + + + #### compute venn for all peaks present in MetDB: MM10 ### + cond <- unique(refexp$condition) + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, + datasetids = refexp[condition %in% cond, ]$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + # there might be a few peaks that overlap two or more peaks from the other tissue or in the reference, + # when this happens, the overlap numbers may differ by a few peaks between Cecum and Liver + # in that case, we keep the number of minimum overlap and we move the additional peak(s) to the down intersection of the corresponding tissue + dfint1["None",] <- dfint1["None",] + (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",])) + dfint1["Other_tissue_only",] <- dfint1["Other_tissue_only",] - (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",])) + dfint1["Ref_only",] <- dfint1["Ref_only",] + (dfint1["Both",]-min(dfint1["Both",])) + dfint1["Both",] <- dfint1["Both",] - (dfint1["Both",]-min(dfint1["Both",])) + + + #number of MetDB regions that have no overlap with our peaks + #dfint2['None',] + #one of our peak can overlap several Med + + if (draw_venn_allMetDb) { + #dev.off() + if (!missing(filename) | !is.null(filename)) pdf(paste0(dir,'/venn_allCond_',filename), 7,7) + venn.plot <- VennDiagram::draw.triple.venn( + area1 = dfint1["Total", "Cecum"], area2 = dfint1["Total", "Liver"], + area3 = dfint2["None", "Ref"] + dfint1["Ref_only", "Cecum"]+ dfint1["Ref_only", "Liver"]+ dfint1["Both", "Cecum"], + n12 = dfint1["Other_tissue_only", "Cecum"] + dfint1["Both", "Cecum"], + n13 = dfint1["Ref_only", "Cecum"] + dfint1["Both", "Cecum"], + n23 = dfint1["Ref_only", "Liver"] + dfint1["Both", "Liver"], + n123 = dfint1["Both", "Cecum"], + category = c("Cecum", "Liver", "Ref"), + #fill = c("dodgerblue", "goldenrod1", "darkorange1"), + cat.col = c("dodgerblue", "goldenrod1", "darkorange1"), + cat.cex = 2, + margin = 0.05, + cex = 1.5, + ind = TRUE); + if (!missing(filename) | !is.null(filename)) dev.off() + } + + + + if (draw_int_perCond) { + #### count overlap for each dataset available in MetDB : MM10 #### + conditions <- unique(refexp$condition) + + expsel <- refexp[condition %in% cond, ] + + intbycond <- foreach (cond = conditions, .combine = rbind) %do% { + + expsel <- refexp[condition %in% cond, ] + + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, datasetids = expsel$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + nbref <- dfs[[3]] + + # nb/percentage in ref + nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2]) + #percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2]) + # nb/percentage in other tissue + nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2]) + #percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2]) + #peaksgr_cecum <- peaksgr_cecumor + #peaksgr_liver <- peaksgr_liveror + + return(c(nbinref, nbref, paste(expsel$id, collapse = ','))) + } + colnames(intbycond) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition') + intbycond <- cbind(metdb_condition = conditions, intbycond, species = 'Mus musculus') + + + #### count overlap for each dataset available in MetDB : HG38 lifted over MM10 #### + conditions <- unique(refexphuman$condition) + + intbycond_human <- foreach (cond = conditions, .combine = rbind) %do% { + + expsel <- refexphuman[condition %in% cond, ] + + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefhumangr, datasetids = expsel$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + nbref <- dfs[[3]] + + # percentage in ref + nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2]) + # percentage in other tissue + nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2]) + + return(c(nbinref, nbref, paste(expsel$id, collapse = ','))) + } + colnames(intbycond_human) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition') + intbycond_human <- cbind(metdb_condition = conditions, intbycond_human, species = 'Homo sapiens (hg38 lifted over mm10)') + + + intbycondall1 <- data.frame(rbind(intbycond, intbycond_human), stringsAsFactors = FALSE) + intbycondall1$Cecum_perc <- as.numeric(as.character(intbycondall1$Cecum))/length(peaksgr_cecum) + intbycondall1$Liver_perc <- as.numeric(as.character(intbycondall1$Liver))/length(peaksgr_liver) + #write.table(intbycondall1, file = 'doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.xls', sep = '\t', quote = FALSE, row.names = FALSE) + + intbycondall <- as.data.frame(rbind(intbycond, intbycond_human)) + intbycondall <- rbind(cbind( intbycondall[,-3] %>% dplyr::rename(.,nbOverlap = Cecum), Tissue = 'Cecum'), + cbind( intbycondall[,-2] %>% dplyr::rename(.,nbOverlap = Liver), Tissue = 'Liver')) + + intbycondall$percOverlap[intbycondall$Tissue == 'Cecum'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Cecum']))/length(peaksgr_cecum) + intbycondall$percOverlap[intbycondall$Tissue == 'Liver'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Liver']))/length(peaksgr_liver) + + intbycondall$species <- factor(intbycondall$species, levels = c("Mus musculus", "Homo sapiens (hg38 lifted over mm10)")) + + + ggnb <- ggplot(intbycondall, aes(x = metdb_condition, y = as.numeric(as.character(nbOverlap)), fill = Tissue)) + + geom_bar(stat = 'identity', position = 'dodge') + facet_wrap(~species, scales = 'free_x') + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('nb of peaks overlapping a reference') + xlab('') + + + ggperc <- ggplot(intbycondall, aes(x = metdb_condition, y = percOverlap, fill = Tissue)) + + geom_bar(stat = 'identity', position = 'dodge', width = 0.8) + facet_wrap(~species, scales = 'free_x') + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('% of peaks overlapping a reference') + + xlab('') + scale_y_continuous(labels=scales::percent) + + #pdf('doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.pdf', width = 8, height = 12) + pdf(paste0(dir,'/barplot_byCond_',filename), width = 8, height = 12) + print(gridExtra::marrangeGrob(list(ggnb, ggperc),# + ggtitle('tSNE')), ,d12tsne1,d12tsne + ncol = 1, nrow = 2, top = paste("Overlap between the detected peaks \n and Met-db v2 peaks"))) + + dev.off() + + } +} + + + + +countOverlaps <- function(peaksgr_cecum, peaksgr_liver, m6arefgr, datasetids) { + + m6arefgrsel <- reduce(m6arefgr[m6arefgr$dataset %in% datasetids,]) + names(m6arefgrsel) <- paste0('ref_',1:length(m6arefgrsel)) # compute overlap between exp1 peaks and ref - ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgr, minoverlap = 1) + ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgrsel, minoverlap = 1) ov <- as.data.table(ov) - ov[, peak := names(peaksgr_cecum)[queryHits]] - ov[, ref := names(m6arefgr)[subjectHits]] - ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgr[ref])), by = .I] - # assign name of the ref peak that has the biggest overlap to the exp1 peak - peaks_to_ref_cecum <- ov[, ref[which.max(ovWidth)], by = peak] - names(peaksgr_cecum)[match(peaks_to_ref_cecum$peak,names(peaksgr_cecum))] <- paste0(peaks_to_ref_cecum$peak, '_', peaks_to_ref_cecum$V1) - #ov[, refm6a := names(m6arefgr)[subjectHits]] - - # load peak annotations of exp2 - load(exp2) - peaksgr <- copy(peaks) - setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) - peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") - if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)] - names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver)) + if (nrow(ov) > 0 ) { + ov[, peak := names(peaksgr_cecum)[queryHits]] + ov[, ref := names(m6arefgrsel)[subjectHits]] + ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgrsel[ref])), by = .I] + } else { + ov <- data.table(peaks = NA, ref = NA, ovWidth = 0) + } # compute overlap between exp2 peaks and ref - ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgr, minoverlap = 1) + ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgrsel, minoverlap = 1) ov_liverref <- as.data.table(ov_liverref) ov_liverref[, peak :=names(peaksgr_liver)[queryHits]] - ov_liverref[, ref := names(m6arefgr)[subjectHits]] - ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgr[ref])), by = .I] - # assign name of the ref peak that has the biggest overlap to the exp2 peak - peaks_to_ref_liver <- ov_liverref[, ref[which.max(ovWidth)], by = peak] - names(peaksgr_liver)[match(peaks_to_ref_liver$peak,names(peaksgr_liver))] <- peaks_to_ref_liver$V1 - + ov_liverref[, ref := names(m6arefgrsel)[subjectHits]] + ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgrsel[ref])), by = .I] # compute overlap between exp1 and exp2 ov_cecum_liver <- as.data.table(findOverlaps(query = peaksgr_liver, subject = peaksgr_cecum, minoverlap = 1)) ov_cecum_liver[, peakCecum := names(peaksgr_cecum)[subjectHits]] ov_cecum_liver[, peakLiver := names(peaksgr_liver)[queryHits]] ov_cecum_liver[, ovWidth := width(pintersect(peaksgr_liver[peakLiver],peaksgr_cecum[peakCecum])), by = .I] - # assign same name to the peak overlapping between exp1 and exp2, using the max overlap only - peaks_to_cecum_liver <- ov_cecum_liver[, peakCecum[which.max(ovWidth)], by = peakLiver] - names(peaksgr_liver)[match(peaks_to_cecum_liver$peakLiver,names(peaksgr_liver))] <- peaks_to_cecum_liver$V1 - # peaks_to_cecum_liver <- ov_cecum_liver[, peakLiver[which.max(ovWidth)], by = peakCecum] - # names(peaksgr_cecum)[match(peaks_to_cecum_liver$peakCecum,names(peaksgr_cecum))] <- peaks_to_cecum_liver$V1 - - if (!missing(filename) | !is.null(filename)) pdf(filename, 7,7) - venn(data = list(Liver=names(peaksgr_liver), Cecum=names(peaksgr_cecum), "reference peaks"=names(m6arefgr))) - if (!missing(filename) | !is.null(filename)) dev.off() - - #length(unique(ov$peak))/length(peaksgr_cecum) - #length(unique(ov$peak));length(peaksgr_cecum) - #length(unique(ov_liverref$peak))/length(peaksgr_liver) - #length(unique(ov_cecum_liver$peakCecum))/length(peaksgr_cecum) - #length(unique(ov_cecum_liver$peakLiver))/length(peaksgr_liver) - #summary(as.numeric(table(ov_liverref$ref))) dfint1 <- data.frame(Cecum = c(Ref_only = length(setdiff(unique(ov$peak), unique(ov_cecum_liver$peakCecum))), - Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), - Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), - None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))), - Total = length(peaksgr_cecum)), - Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))), - Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), - Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), - None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))), - Total = length(peaksgr_liver))) + Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), + Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), + None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))), + Total = length(peaksgr_cecum)), + Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))), + Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), + Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), + None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))), + Total = length(peaksgr_liver))) dfint2 <- data.frame(Ref = c(Cecum_only =length(setdiff(unique(ov$ref), unique(ov_liverref$ref))), - Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))), - Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))), - None = length(setdiff(unique(names(m6arefgr)), c(unique(ov$ref), unique(ov_liverref$ref)))), - Total = length(m6arefgr))) - - dfint1 - dfint2 - #as.matrix(dfint1)/c(length(peaksgr_cecum), length(peaksgr_liver)) + Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))), + Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))), + None = length(setdiff(unique(names(m6arefgrsel)), c(unique(ov$ref), unique(ov_liverref$ref)))), + Total = length(m6arefgrsel))) + dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE) - #as.matrix(dfint2)/length(m6arefgr) - dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgr)), center = FALSE) - - - # percentage in ref - percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2]) - # percentage in other tissue - percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2]) - #length(subsetByOverlaps(peaksgr_liver, m6arefgr))/length(peaksgr_liver) - return(data.frame(Cecum = c(perc_inothertissue = percinothertissue[1], perc_inref = percinref[1]), - Liver = c(perc_inothertissue = percinothertissue[2], perc_inref = percinref[2]))) - #return(list(dfint1, dfint2, dfint1_perc, dfint2_perc )) + dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgrsel)), center = FALSE) + + + return(list(dfint1, dfint2, length(m6arefgrsel))) } diff --git a/src/m6aAnalysis/R/venns.R b/src/m6aAnalysis/R/venns.R index 59772c931d93fbf407a4926fffaf3b510b6c4a33..92395d711a28f12e0d397a29311b397323538c9d 100644 --- a/src/m6aAnalysis/R/venns.R +++ b/src/m6aAnalysis/R/venns.R @@ -1,116 +1,288 @@ -library(GenomicFeatures) library(data.table) +library(GenomicRanges) +library(foreach) +library(gplots) -plotVenns <- function(exp1='PeakDiffExpression/Cecum_all_MaxMaxValues_3/Cecum_all_MaxMaxValues_3.RData', - exp2='PeakDiffExpression/Liver_all_MaxMaxValues_3/Liver_all_MaxMaxValues_3.RData', - bed_ref="Genome/pc_ep_mouse_mm10.bed", - filename="venn_LiverAll_CecumAll_ref.pdf", - select_counts=FALSE) { - load(exp1) + +#' Compute overlaps between peaks and MetDB sites +#' @param exp1 RData file output of m6amaker pipeline for the first MeRIPSeq experiment +#' @param exp2 RData file output of m6amaker pipeline for the second MeRIPSeq experiment +#' @param bed_ref BED file containing list of regions contained in the MetDB database for mm10 +#' @param bed_ref_lifted BED file containing list of regions contained in the MetDB database for hg38 lifted over mm10 +#' @param dir directory where to save files +#' @param filename suffix of pdf file where to save plots +#' @param refexp MetDB reference file containing description of each dataset stored in the database +#' @param draw_venn_allMetDb TRUE if the venn diagramm summarizing the overlaps of the two experiments with MetDB should be drawn +#' @param draw_int_perCond TRUE if the barplot showing overlaps between each dataset type/condition available in MetDB and each MeRIPSeq experiment should be drawn +#' +plotIntersectionMetDB <- function(exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData', + exp2='PeakDiffExpression/Liver_allWithoutEc_T13_MaxMaxValues_3/Liver_allWithoutEc_T13_MaxMaxValues_3.RData', + bed_ref="Genome/pc_ep_mouse_mm10.bed", + bed_ref_lifted="Genome/pc_ep_human_hg38_liftOver2mm10_conversions.bed", + dir='.', + filename="LiverAll_CecumAll_ref.pdf", + refexp="Genome/metdb_v2_exp.csv", + select_counts=FALSE, + draw_venn_allMetDb=TRUE, + draw_int_perCond=FALSE +) { + + # load metdb_v2 experiment description + # reformat conditions such that identical conditions get identical names + refexp <- fread("Genome/metdb_v2_exp.csv") + setnames(refexp, c('id', 'id2', 'species', 'cellline', 'tissue', 'note')) + refexp[, condition := paste0(tissue, ': ', cellline)] + refexp[, condition := gsub('^: |: $', '', condition)] + #table(refexp$condition) + refexp[condition == 'A549', condition := 'Lung cancer: A549'] + refexp[, condition := tolower(condition)] + + # select human data + # reannotate conditions such that we can merge samples more homogeneously + refexphuman <- refexp[species %in% c('Homo sapiens'),] + refexphuman[grep('hepg2', condition, ignore.case = TRUE), condition := 'liver: HepG2'] + refexphuman[grep('h1a|h1b', condition, ignore.case = TRUE), condition := 'h1a/h1b endoderm or embryonic stem'] + refexphuman[grep('hek293t', condition, ignore.case = TRUE), condition := 'embryonic kidney: hek293t'] + + # select mouse data + refexp <- refexp[species %in% c('Mus musculus'),] + # reannotate conditions such that we can merge samples more homogeneously + refexp[condition %in% c("adult brain", "embyronic brain") , condition := 'brain'] + refexp[grep('hek293', condition), condition := 'HEK293 derivative, HEK293T embryonic kidney'] + refexp[grep('bvsc', condition), condition := "bvsc embryonic stem cells, bvsc embryonic bodies"] + refexp[grep('mef', condition), condition := "MEF cells"] + refexp[grep('hepg2', condition), condition := 'liver: HepG2'] + + + # load peak annotations of exp2 + load(exp2) + peaksgr <- copy(peaks) + setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) + peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") + if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)] + names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver)) + peaksgr_liveror <- peaksgr_liver + peaksgr_liverde <- peaksgr_liver[paste0('Liver_', diff_peaks)] + peaksgr_liverde_unique <- reduce(peaksgr_liverde) + # load peak annotations of exp1 + exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData' + load(exp1) peaksgr <- copy(peaks) setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) peaksgr_cecum <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") if (select_counts) peaksgr_cecum <- peaksgr_cecum[rownames(counts_epi)] names(peaksgr_cecum) <- paste0('Cecum_',names(peaksgr_cecum)) + peaksgr_cecumor <- peaksgr_cecum + peaksgr_cecumde <- peaksgr_cecum[paste0('Cecum_', diff_peaks)] + peaksgr_cecumde_unique <- reduce(peaksgr_cecumde) - # load MERIP reference sites + # load MERIP reference sites / MOUSE m6aref <- fread(bed_ref)#sb_m6a_mouse_mm10.bed") setnames(m6aref, c('chr', 'start', 'end', 'type', 'id', 'strand')) m6arefgr <- makeGRangesFromDataFrame(as.data.frame(m6aref), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr") - m6arefgr <- reduce(m6arefgr) + m6arefgr$dataset <- sub('_[0-9]+$','', as.character(m6arefgr$id)) names(m6arefgr) <- paste0('ref_',1:length(m6arefgr)) + m6arefgror <- m6arefgr + # length selection + #m6arefgror<- m6arefgr <- m6arefgr[width(m6arefgr) < 5000, ] + + # load MERIP reference sites / HUMAN LIFTED OVER TO MOUSE + m6arefhuman <- fread(bed_ref_lifted)#sb_m6a_mouse_mm10.bed") + setnames(m6arefhuman, c('chr', 'start', 'end', 'id', 'chr_mm10_jesaispas', 'strand', 'start_mm10', 'end_mm10', 'supp')) + m6arefhumangr <- makeGRangesFromDataFrame(as.data.frame(m6arefhuman), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr") + m6arefhumangr$dataset <- sub('_[0-9]+$','', as.character(m6arefhumangr$id)) + names(m6arefhumangr) <- paste0('ref_',1:length(m6arefhumangr)) + m6arefhumangror <- m6arefhumangr + # length selection + #m6arefhumangror<- m6arefhumangr <- m6arefhumangr[width(m6arefhumangr) < 5000, ] + + + #### compute venn for all peaks present in MetDB: MM10 ### + cond <- unique(refexp$condition) + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, + datasetids = refexp[condition %in% cond, ]$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + # there might be a few peaks that overlap two or more peaks from the other tissue or in the reference, + # when this happens, the overlap numbers may differ by a few peaks between Cecum and Liver + # in that case, we keep the number of minimum overlap and we move the additional peak(s) to the down intersection of the corresponding tissue + dfint1["None",] <- dfint1["None",] + (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",])) + dfint1["Other_tissue_only",] <- dfint1["Other_tissue_only",] - (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",])) + dfint1["Ref_only",] <- dfint1["Ref_only",] + (dfint1["Both",]-min(dfint1["Both",])) + dfint1["Both",] <- dfint1["Both",] - (dfint1["Both",]-min(dfint1["Both",])) + + + #number of MetDB regions that have no overlap with our peaks + #dfint2['None',] + #one of our peak can overlap several Med + + if (draw_venn_allMetDb) { + #dev.off() + if (!missing(filename) | !is.null(filename)) pdf(paste0(dir,'/venn_allCond_',filename), 7,7) + venn.plot <- VennDiagram::draw.triple.venn( + area1 = dfint1["Total", "Cecum"], area2 = dfint1["Total", "Liver"], + area3 = dfint2["None", "Ref"] + dfint1["Ref_only", "Cecum"]+ dfint1["Ref_only", "Liver"]+ dfint1["Both", "Cecum"], + n12 = dfint1["Other_tissue_only", "Cecum"] + dfint1["Both", "Cecum"], + n13 = dfint1["Ref_only", "Cecum"] + dfint1["Both", "Cecum"], + n23 = dfint1["Ref_only", "Liver"] + dfint1["Both", "Liver"], + n123 = dfint1["Both", "Cecum"], + category = c("Cecum", "Liver", "Ref"), + #fill = c("dodgerblue", "goldenrod1", "darkorange1"), + cat.col = c("dodgerblue", "goldenrod1", "darkorange1"), + cat.cex = 2, + margin = 0.05, + cex = 1.5, + ind = TRUE); + if (!missing(filename) | !is.null(filename)) dev.off() + } + + + + if (draw_int_perCond) { + #### count overlap for each dataset available in MetDB : MM10 #### + conditions <- unique(refexp$condition) + + expsel <- refexp[condition %in% cond, ] + + intbycond <- foreach (cond = conditions, .combine = rbind) %do% { + + expsel <- refexp[condition %in% cond, ] + + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, datasetids = expsel$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + nbref <- dfs[[3]] + + # nb/percentage in ref + nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2]) + #percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2]) + # nb/percentage in other tissue + nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2]) + #percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2]) + #peaksgr_cecum <- peaksgr_cecumor + #peaksgr_liver <- peaksgr_liveror + + return(c(nbinref, nbref, paste(expsel$id, collapse = ','))) + } + colnames(intbycond) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition') + intbycond <- cbind(metdb_condition = conditions, intbycond, species = 'Mus musculus') + + + #### count overlap for each dataset available in MetDB : HG38 lifted over MM10 #### + conditions <- unique(refexphuman$condition) + + intbycond_human <- foreach (cond = conditions, .combine = rbind) %do% { + + expsel <- refexphuman[condition %in% cond, ] + + dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefhumangr, datasetids = expsel$id) + dfint1 <- dfs[[1]] + dfint2 <- dfs[[2]] + nbref <- dfs[[3]] + + # percentage in ref + nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2]) + # percentage in other tissue + nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2]) + + return(c(nbinref, nbref, paste(expsel$id, collapse = ','))) + } + colnames(intbycond_human) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition') + intbycond_human <- cbind(metdb_condition = conditions, intbycond_human, species = 'Homo sapiens (hg38 lifted over mm10)') + + + intbycondall1 <- data.frame(rbind(intbycond, intbycond_human), stringsAsFactors = FALSE) + intbycondall1$Cecum_perc <- as.numeric(as.character(intbycondall1$Cecum))/length(peaksgr_cecum) + intbycondall1$Liver_perc <- as.numeric(as.character(intbycondall1$Liver))/length(peaksgr_liver) + #write.table(intbycondall1, file = 'doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.xls', sep = '\t', quote = FALSE, row.names = FALSE) + + intbycondall <- as.data.frame(rbind(intbycond, intbycond_human)) + intbycondall <- rbind(cbind( intbycondall[,-3] %>% dplyr::rename(.,nbOverlap = Cecum), Tissue = 'Cecum'), + cbind( intbycondall[,-2] %>% dplyr::rename(.,nbOverlap = Liver), Tissue = 'Liver')) + + intbycondall$percOverlap[intbycondall$Tissue == 'Cecum'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Cecum']))/length(peaksgr_cecum) + intbycondall$percOverlap[intbycondall$Tissue == 'Liver'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Liver']))/length(peaksgr_liver) + + intbycondall$species <- factor(intbycondall$species, levels = c("Mus musculus", "Homo sapiens (hg38 lifted over mm10)")) + + + ggnb <- ggplot(intbycondall, aes(x = metdb_condition, y = as.numeric(as.character(nbOverlap)), fill = Tissue)) + + geom_bar(stat = 'identity', position = 'dodge') + facet_wrap(~species, scales = 'free_x') + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('nb of peaks overlapping a reference') + xlab('') + + + ggperc <- ggplot(intbycondall, aes(x = metdb_condition, y = percOverlap, fill = Tissue)) + + geom_bar(stat = 'identity', position = 'dodge', width = 0.8) + facet_wrap(~species, scales = 'free_x') + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('% of peaks overlapping a reference') + + xlab('') + scale_y_continuous(labels=scales::percent) + + #pdf('doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.pdf', width = 8, height = 12) + pdf(paste0(dir,'/barplot_byCond_',filename), width = 8, height = 12) + print(gridExtra::marrangeGrob(list(ggnb, ggperc),# + ggtitle('tSNE')), ,d12tsne1,d12tsne + ncol = 1, nrow = 2, top = paste("Overlap between the detected peaks \n and Met-db v2 peaks"))) + + dev.off() + + } +} + + + + +countOverlaps <- function(peaksgr_cecum, peaksgr_liver, m6arefgr, datasetids) { + + m6arefgrsel <- reduce(m6arefgr[m6arefgr$dataset %in% datasetids,]) + names(m6arefgrsel) <- paste0('ref_',1:length(m6arefgrsel)) # compute overlap between exp1 peaks and ref - ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgr, minoverlap = 1) + ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgrsel, minoverlap = 1) ov <- as.data.table(ov) - ov[, peak := names(peaksgr_cecum)[queryHits]] - ov[, ref := names(m6arefgr)[subjectHits]] - ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgr[ref])), by = .I] - # assign name of the ref peak that has the biggest overlap to the exp1 peak - peaks_to_ref_cecum <- ov[, ref[which.max(ovWidth)], by = peak] - names(peaksgr_cecum)[match(peaks_to_ref_cecum$peak,names(peaksgr_cecum))] <- paste0(peaks_to_ref_cecum$peak, '_', peaks_to_ref_cecum$V1) - #ov[, refm6a := names(m6arefgr)[subjectHits]] - - # load peak annotations of exp2 - load(exp2) - peaksgr <- copy(peaks) - setnames(peaksgr, c("chromo_peak", "begin_peak", "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene')) - peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr") - if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)] - names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver)) + if (nrow(ov) > 0 ) { + ov[, peak := names(peaksgr_cecum)[queryHits]] + ov[, ref := names(m6arefgrsel)[subjectHits]] + ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgrsel[ref])), by = .I] + } else { + ov <- data.table(peaks = NA, ref = NA, ovWidth = 0) + } # compute overlap between exp2 peaks and ref - ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgr, minoverlap = 1) + ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgrsel, minoverlap = 1) ov_liverref <- as.data.table(ov_liverref) ov_liverref[, peak :=names(peaksgr_liver)[queryHits]] - ov_liverref[, ref := names(m6arefgr)[subjectHits]] - ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgr[ref])), by = .I] - # assign name of the ref peak that has the biggest overlap to the exp2 peak - peaks_to_ref_liver <- ov_liverref[, ref[which.max(ovWidth)], by = peak] - names(peaksgr_liver)[match(peaks_to_ref_liver$peak,names(peaksgr_liver))] <- peaks_to_ref_liver$V1 - + ov_liverref[, ref := names(m6arefgrsel)[subjectHits]] + ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgrsel[ref])), by = .I] # compute overlap between exp1 and exp2 ov_cecum_liver <- as.data.table(findOverlaps(query = peaksgr_liver, subject = peaksgr_cecum, minoverlap = 1)) ov_cecum_liver[, peakCecum := names(peaksgr_cecum)[subjectHits]] ov_cecum_liver[, peakLiver := names(peaksgr_liver)[queryHits]] ov_cecum_liver[, ovWidth := width(pintersect(peaksgr_liver[peakLiver],peaksgr_cecum[peakCecum])), by = .I] - # assign same name to the peak overlapping between exp1 and exp2, using the max overlap only - peaks_to_cecum_liver <- ov_cecum_liver[, peakCecum[which.max(ovWidth)], by = peakLiver] - names(peaksgr_liver)[match(peaks_to_cecum_liver$peakLiver,names(peaksgr_liver))] <- peaks_to_cecum_liver$V1 - # peaks_to_cecum_liver <- ov_cecum_liver[, peakLiver[which.max(ovWidth)], by = peakCecum] - # names(peaksgr_cecum)[match(peaks_to_cecum_liver$peakCecum,names(peaksgr_cecum))] <- peaks_to_cecum_liver$V1 - - if (!missing(filename) | !is.null(filename)) pdf(filename, 7,7) - venn(data = list(Liver=names(peaksgr_liver), Cecum=names(peaksgr_cecum), "reference peaks"=names(m6arefgr))) - if (!missing(filename) | !is.null(filename)) dev.off() - - #length(unique(ov$peak))/length(peaksgr_cecum) - #length(unique(ov$peak));length(peaksgr_cecum) - #length(unique(ov_liverref$peak))/length(peaksgr_liver) - #length(unique(ov_cecum_liver$peakCecum))/length(peaksgr_cecum) - #length(unique(ov_cecum_liver$peakLiver))/length(peaksgr_liver) - #summary(as.numeric(table(ov_liverref$ref))) dfint1 <- data.frame(Cecum = c(Ref_only = length(setdiff(unique(ov$peak), unique(ov_cecum_liver$peakCecum))), - Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), - Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), - None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))), - Total = length(peaksgr_cecum)), - Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))), - Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), - Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), - None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))), - Total = length(peaksgr_liver))) + Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), + Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))), + None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))), + Total = length(peaksgr_cecum)), + Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))), + Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), + Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))), + None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))), + Total = length(peaksgr_liver))) dfint2 <- data.frame(Ref = c(Cecum_only =length(setdiff(unique(ov$ref), unique(ov_liverref$ref))), - Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))), - Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))), - None = length(setdiff(unique(names(m6arefgr)), c(unique(ov$ref), unique(ov_liverref$ref)))), - Total = length(m6arefgr))) - - dfint1 - dfint2 - #as.matrix(dfint1)/c(length(peaksgr_cecum), length(peaksgr_liver)) - dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE) - #as.matrix(dfint2)/length(m6arefgr) - dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgr)), center = FALSE) - - - # percentage in ref - percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2]) - # percentage in other tissue - percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2]) - #length(subsetByOverlaps(peaksgr_liver, m6arefgr))/length(peaksgr_liver) - return(data.frame(Cecum = c(perc_inothertissue = percinothertissue[1], perc_inref = percinref[1]), - Liver = c(perc_inothertissue = percinothertissue[2], perc_inref = percinref[2]))) - #return(list(dfint1, dfint2, dfint1_perc, dfint2_perc )) -} + Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))), + Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))), + None = length(setdiff(unique(names(m6arefgrsel)), c(unique(ov$ref), unique(ov_liverref$ref)))), + Total = length(m6arefgrsel))) + dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE) + dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgrsel)), center = FALSE) + return(list(dfint1, dfint2, length(m6arefgrsel))) +}