diff --git a/src/R/venns.R b/src/R/venns.R
index f2d14cb8e2182c2625b292595a3ff21c16b5fde6..92395d711a28f12e0d397a29311b397323538c9d 100644
--- a/src/R/venns.R
+++ b/src/R/venns.R
@@ -1,113 +1,288 @@
-library(GenomicFeatures)
 library(data.table)
+library(GenomicRanges)
+library(foreach)
+library(gplots)
 
 
-plotVenns <- function(exp1='PeakDiffExpression/Cecum_all_MaxMaxValues_3/Cecum_all_MaxMaxValues_3.RData',
-                      exp2='PeakDiffExpression/Liver_all_MaxMaxValues_3/Liver_all_MaxMaxValues_3.RData',
-                      bed_ref="Genome/pc_ep_mouse_mm10.bed",
-                      filename="venn_LiverAll_CecumAll_ref.pdf",
-                      select_counts=FALSE) {
 
 
-  load(exp1)
+
+#' Compute overlaps between peaks and MetDB sites
+#' @param exp1 RData file output of m6amaker pipeline for the first MeRIPSeq experiment
+#' @param exp2 RData file output of m6amaker pipeline for the second MeRIPSeq experiment
+#' @param bed_ref BED file containing list of regions contained in the MetDB database for mm10
+#' @param bed_ref_lifted BED file containing list of regions contained in the MetDB database for hg38 lifted over mm10
+#' @param dir directory where to save files
+#' @param filename suffix of pdf file where to save plots
+#' @param refexp MetDB reference file containing description of each dataset stored in the database
+#' @param draw_venn_allMetDb TRUE if the venn diagramm summarizing the overlaps of the two experiments with MetDB should be drawn
+#' @param draw_int_perCond TRUE  if the barplot showing overlaps between each dataset type/condition available in MetDB and each MeRIPSeq experiment should be drawn
+#'
+plotIntersectionMetDB <- function(exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData',
+                                  exp2='PeakDiffExpression/Liver_allWithoutEc_T13_MaxMaxValues_3/Liver_allWithoutEc_T13_MaxMaxValues_3.RData',
+                                  bed_ref="Genome/pc_ep_mouse_mm10.bed",
+                                  bed_ref_lifted="Genome/pc_ep_human_hg38_liftOver2mm10_conversions.bed",
+                                  dir='.',
+                                  filename="LiverAll_CecumAll_ref.pdf",
+                                  refexp="Genome/metdb_v2_exp.csv",
+                                  select_counts=FALSE,
+                                  draw_venn_allMetDb=TRUE,
+                                  draw_int_perCond=FALSE
+) {
+
+  # load metdb_v2 experiment description
+  # reformat conditions such that identical conditions get identical names
+  refexp <- fread("Genome/metdb_v2_exp.csv")
+  setnames(refexp, c('id', 'id2', 'species', 'cellline', 'tissue', 'note'))
+  refexp[, condition := paste0(tissue, ': ', cellline)]
+  refexp[, condition := gsub('^: |: $', '', condition)]
+  #table(refexp$condition)
+  refexp[condition == 'A549', condition := 'Lung cancer: A549']
+  refexp[, condition := tolower(condition)]
+
+  # select human data
+  # reannotate conditions such that we can merge samples more homogeneously
+  refexphuman <- refexp[species %in% c('Homo sapiens'),]
+  refexphuman[grep('hepg2', condition, ignore.case = TRUE), condition := 'liver: HepG2']
+  refexphuman[grep('h1a|h1b', condition, ignore.case = TRUE), condition := 'h1a/h1b endoderm or embryonic stem']
+  refexphuman[grep('hek293t', condition, ignore.case = TRUE), condition := 'embryonic kidney: hek293t']
+
+  # select mouse data
+  refexp <- refexp[species %in% c('Mus musculus'),]
+  # reannotate conditions such that we can merge samples more homogeneously
+  refexp[condition %in% c("adult brain", "embyronic brain") , condition := 'brain']
+  refexp[grep('hek293', condition), condition := 'HEK293 derivative, HEK293T embryonic kidney']
+  refexp[grep('bvsc', condition), condition := "bvsc embryonic stem cells, bvsc embryonic bodies"]
+  refexp[grep('mef', condition), condition := "MEF cells"]
+  refexp[grep('hepg2', condition), condition := 'liver: HepG2']
+
+
+  # load peak annotations of exp2
+  load(exp2)
+  peaksgr <- copy(peaks)
+  setnames(peaksgr, c("chromo_peak",   "begin_peak",  "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
+  peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
+  if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)]
+  names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver))
+  peaksgr_liveror <- peaksgr_liver
+  peaksgr_liverde <- peaksgr_liver[paste0('Liver_', diff_peaks)]
+  peaksgr_liverde_unique <- reduce(peaksgr_liverde)
+
 
   # load peak annotations of exp1
+  exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData'
+  load(exp1)
   peaksgr <- copy(peaks)
   setnames(peaksgr, c("chromo_peak",   "begin_peak",    "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
   peaksgr_cecum <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
   if (select_counts) peaksgr_cecum <- peaksgr_cecum[rownames(counts_epi)]
   names(peaksgr_cecum) <- paste0('Cecum_',names(peaksgr_cecum))
+  peaksgr_cecumor <- peaksgr_cecum
+  peaksgr_cecumde <- peaksgr_cecum[paste0('Cecum_', diff_peaks)]
+  peaksgr_cecumde_unique <- reduce(peaksgr_cecumde)
 
-  # load MERIP reference sites
+  # load MERIP reference sites / MOUSE
   m6aref <- fread(bed_ref)#sb_m6a_mouse_mm10.bed")
   setnames(m6aref, c('chr', 'start', 'end', 'type', 'id', 'strand'))
   m6arefgr <- makeGRangesFromDataFrame(as.data.frame(m6aref), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr")
-  m6arefgr <- reduce(m6arefgr)
+  m6arefgr$dataset <- sub('_[0-9]+$','', as.character(m6arefgr$id))
   names(m6arefgr) <- paste0('ref_',1:length(m6arefgr))
+  m6arefgror <- m6arefgr
+  # length selection
+  #m6arefgror<- m6arefgr <-  m6arefgr[width(m6arefgr) < 5000, ]
+
+  # load MERIP reference sites / HUMAN LIFTED OVER TO MOUSE
+  m6arefhuman <- fread(bed_ref_lifted)#sb_m6a_mouse_mm10.bed")
+  setnames(m6arefhuman, c('chr', 'start', 'end', 'id', 'chr_mm10_jesaispas', 'strand', 'start_mm10', 'end_mm10', 'supp'))
+  m6arefhumangr <- makeGRangesFromDataFrame(as.data.frame(m6arefhuman), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr")
+  m6arefhumangr$dataset <- sub('_[0-9]+$','', as.character(m6arefhumangr$id))
+  names(m6arefhumangr) <- paste0('ref_',1:length(m6arefhumangr))
+  m6arefhumangror <- m6arefhumangr
+  # length selection
+  #m6arefhumangror<- m6arefhumangr <-  m6arefhumangr[width(m6arefhumangr) < 5000, ]
+
+
+  #### compute venn for all peaks present in MetDB: MM10 ###
+  cond <- unique(refexp$condition)
+  dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr,
+                       datasetids = refexp[condition %in% cond, ]$id)
+  dfint1 <- dfs[[1]]
+  dfint2 <- dfs[[2]]
+  # there might be a few peaks that overlap two or more peaks from the other tissue or in the reference,
+  # when this happens, the overlap numbers may differ by a few peaks between Cecum and Liver
+  # in that case, we keep the number of minimum overlap and we move the additional peak(s) to the down intersection of the corresponding tissue
+  dfint1["None",] <-   dfint1["None",] + (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",]))
+  dfint1["Other_tissue_only",] <-   dfint1["Other_tissue_only",] - (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",]))
+  dfint1["Ref_only",] <-   dfint1["Ref_only",] + (dfint1["Both",]-min(dfint1["Both",]))
+  dfint1["Both",] <-   dfint1["Both",] - (dfint1["Both",]-min(dfint1["Both",]))
+
+
+  #number of MetDB regions that have no overlap with our peaks
+  #dfint2['None',]
+  #one of our peak can overlap several Med
+
+  if (draw_venn_allMetDb) {
+    #dev.off()
+    if (!missing(filename) | !is.null(filename)) pdf(paste0(dir,'/venn_allCond_',filename), 7,7)
+    venn.plot <- VennDiagram::draw.triple.venn(
+      area1 = dfint1["Total", "Cecum"], area2 = dfint1["Total", "Liver"],
+      area3 = dfint2["None", "Ref"] + dfint1["Ref_only", "Cecum"]+ dfint1["Ref_only", "Liver"]+ dfint1["Both", "Cecum"],
+      n12 =  dfint1["Other_tissue_only", "Cecum"] + dfint1["Both", "Cecum"],
+      n13 =  dfint1["Ref_only", "Cecum"] + dfint1["Both", "Cecum"],
+      n23 = dfint1["Ref_only", "Liver"] + dfint1["Both", "Liver"],
+      n123 = dfint1["Both", "Cecum"],
+      category = c("Cecum", "Liver", "Ref"),
+      #fill = c("dodgerblue", "goldenrod1", "darkorange1"),
+      cat.col = c("dodgerblue", "goldenrod1", "darkorange1"),
+      cat.cex = 2,
+      margin = 0.05,
+      cex = 1.5,
+      ind = TRUE);
+    if (!missing(filename) | !is.null(filename)) dev.off()
+  }
+
+
+
+  if (draw_int_perCond) {
+    #### count overlap for each dataset available in MetDB : MM10 ####
+    conditions <- unique(refexp$condition)
+
+    expsel <- refexp[condition %in% cond, ]
+
+    intbycond <- foreach (cond = conditions, .combine = rbind) %do% {
+
+      expsel <- refexp[condition %in% cond, ]
+
+      dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, datasetids = expsel$id)
+      dfint1 <- dfs[[1]]
+      dfint2 <- dfs[[2]]
+      nbref <- dfs[[3]]
+
+      # nb/percentage in ref
+      nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2])
+      #percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2])
+      # nb/percentage in other tissue
+      nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2])
+      #percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2])
+      #peaksgr_cecum <- peaksgr_cecumor
+      #peaksgr_liver <- peaksgr_liveror
+
+      return(c(nbinref, nbref, paste(expsel$id, collapse = ',')))
+    }
+    colnames(intbycond) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition')
+    intbycond <- cbind(metdb_condition = conditions, intbycond, species = 'Mus musculus')
+
+
+    #### count overlap for each dataset available in MetDB : HG38 lifted over MM10 ####
+    conditions <- unique(refexphuman$condition)
+
+    intbycond_human <- foreach (cond = conditions, .combine = rbind) %do% {
+
+      expsel <- refexphuman[condition %in% cond, ]
+
+      dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefhumangr, datasetids = expsel$id)
+      dfint1 <- dfs[[1]]
+      dfint2 <- dfs[[2]]
+      nbref <- dfs[[3]]
+
+      # percentage in ref
+      nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2])
+      # percentage in other tissue
+      nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2])
+
+      return(c(nbinref, nbref, paste(expsel$id, collapse = ',')))
+    }
+    colnames(intbycond_human) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition')
+    intbycond_human <- cbind(metdb_condition = conditions, intbycond_human, species = 'Homo sapiens (hg38 lifted over mm10)')
+
+
+    intbycondall1 <- data.frame(rbind(intbycond, intbycond_human), stringsAsFactors = FALSE)
+    intbycondall1$Cecum_perc <- as.numeric(as.character(intbycondall1$Cecum))/length(peaksgr_cecum)
+    intbycondall1$Liver_perc <- as.numeric(as.character(intbycondall1$Liver))/length(peaksgr_liver)
+    #write.table(intbycondall1, file = 'doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.xls', sep = '\t', quote = FALSE, row.names = FALSE)
+
+    intbycondall <- as.data.frame(rbind(intbycond, intbycond_human))
+    intbycondall <- rbind(cbind(  intbycondall[,-3] %>% dplyr::rename(.,nbOverlap = Cecum), Tissue = 'Cecum'),
+                          cbind(  intbycondall[,-2]  %>% dplyr::rename(.,nbOverlap = Liver), Tissue = 'Liver'))
+
+    intbycondall$percOverlap[intbycondall$Tissue == 'Cecum'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Cecum']))/length(peaksgr_cecum)
+    intbycondall$percOverlap[intbycondall$Tissue == 'Liver'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Liver']))/length(peaksgr_liver)
+
+    intbycondall$species <- factor(intbycondall$species, levels = c("Mus musculus", "Homo sapiens (hg38 lifted over mm10)"))
+
+
+    ggnb <- ggplot(intbycondall, aes(x = metdb_condition, y = as.numeric(as.character(nbOverlap)), fill = Tissue)) +
+      geom_bar(stat = 'identity', position = 'dodge') + facet_wrap(~species, scales = 'free_x') +
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('nb of peaks overlapping a reference') + xlab('')
+
+
+    ggperc <- ggplot(intbycondall, aes(x = metdb_condition, y = percOverlap, fill = Tissue)) +
+      geom_bar(stat = 'identity', position = 'dodge', width = 0.8) + facet_wrap(~species, scales = 'free_x') +
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('% of peaks overlapping a reference') +
+      xlab('') + scale_y_continuous(labels=scales::percent)
+
+    #pdf('doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.pdf', width = 8, height = 12)
+    pdf(paste0(dir,'/barplot_byCond_',filename), width = 8, height = 12)
+    print(gridExtra::marrangeGrob(list(ggnb, ggperc),# + ggtitle('tSNE')), ,d12tsne1,d12tsne
+                                  ncol = 1, nrow = 2, top = paste("Overlap between the detected peaks \n and Met-db v2 peaks")))
+
+    dev.off()
+
+  }
+}
+
+
+
+
+countOverlaps <- function(peaksgr_cecum, peaksgr_liver, m6arefgr, datasetids) {
+
+  m6arefgrsel <- reduce(m6arefgr[m6arefgr$dataset %in% datasetids,])
+  names(m6arefgrsel) <- paste0('ref_',1:length(m6arefgrsel))
 
   # compute overlap between exp1 peaks and ref
-  ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgr, minoverlap = 1)
+  ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgrsel, minoverlap = 1)
   ov <- as.data.table(ov)
-  ov[, peak := names(peaksgr_cecum)[queryHits]]
-  ov[, ref := names(m6arefgr)[subjectHits]]
-  ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgr[ref])), by = .I]
-  # assign name of the ref peak that has the biggest overlap to the exp1 peak
-  peaks_to_ref_cecum <- ov[, ref[which.max(ovWidth)], by = peak]
-  names(peaksgr_cecum)[match(peaks_to_ref_cecum$peak,names(peaksgr_cecum))] <- paste0(peaks_to_ref_cecum$peak, '_', peaks_to_ref_cecum$V1)
-  #ov[, refm6a := names(m6arefgr)[subjectHits]]
-
-  # load peak annotations of exp2
-  load(exp2)
-  peaksgr <- copy(peaks)
-  setnames(peaksgr, c("chromo_peak",   "begin_peak",  "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
-  peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
-  if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)]
-  names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver))
+  if (nrow(ov) > 0 ) {
+    ov[, peak := names(peaksgr_cecum)[queryHits]]
+    ov[, ref := names(m6arefgrsel)[subjectHits]]
+    ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgrsel[ref])), by = .I]
+  } else {
+    ov <- data.table(peaks = NA, ref = NA, ovWidth = 0)
+  }
 
   # compute overlap between exp2 peaks and ref
-  ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgr, minoverlap = 1)
+  ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgrsel, minoverlap = 1)
   ov_liverref <- as.data.table(ov_liverref)
   ov_liverref[, peak :=names(peaksgr_liver)[queryHits]]
-  ov_liverref[, ref := names(m6arefgr)[subjectHits]]
-  ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgr[ref])), by = .I]
-  # assign name of the ref peak that has the biggest overlap to the exp2 peak
-  peaks_to_ref_liver <- ov_liverref[, ref[which.max(ovWidth)], by = peak]
-  names(peaksgr_liver)[match(peaks_to_ref_liver$peak,names(peaksgr_liver))] <- peaks_to_ref_liver$V1
-
+  ov_liverref[, ref := names(m6arefgrsel)[subjectHits]]
+  ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgrsel[ref])), by = .I]
 
   # compute overlap between exp1 and exp2
   ov_cecum_liver <- as.data.table(findOverlaps(query = peaksgr_liver, subject = peaksgr_cecum, minoverlap = 1))
   ov_cecum_liver[, peakCecum := names(peaksgr_cecum)[subjectHits]]
   ov_cecum_liver[, peakLiver := names(peaksgr_liver)[queryHits]]
   ov_cecum_liver[, ovWidth := width(pintersect(peaksgr_liver[peakLiver],peaksgr_cecum[peakCecum])), by = .I]
-  # assign same name to the peak overlapping between exp1 and exp2, using the max overlap only
-  peaks_to_cecum_liver <- ov_cecum_liver[, peakCecum[which.max(ovWidth)], by = peakLiver]
-  names(peaksgr_liver)[match(peaks_to_cecum_liver$peakLiver,names(peaksgr_liver))] <- peaks_to_cecum_liver$V1
-  # peaks_to_cecum_liver <- ov_cecum_liver[, peakLiver[which.max(ovWidth)], by = peakCecum]
-  # names(peaksgr_cecum)[match(peaks_to_cecum_liver$peakCecum,names(peaksgr_cecum))] <- peaks_to_cecum_liver$V1
-
-  if (!missing(filename) | !is.null(filename)) pdf(filename, 7,7)
-  venn(data = list(Liver=names(peaksgr_liver), Cecum=names(peaksgr_cecum), "reference peaks"=names(m6arefgr)))
-  if (!missing(filename) | !is.null(filename)) dev.off()
-
-  #length(unique(ov$peak))/length(peaksgr_cecum)
-  #length(unique(ov$peak));length(peaksgr_cecum)
-  #length(unique(ov_liverref$peak))/length(peaksgr_liver)
-  #length(unique(ov_cecum_liver$peakCecum))/length(peaksgr_cecum)
-  #length(unique(ov_cecum_liver$peakLiver))/length(peaksgr_liver)
-  #summary(as.numeric(table(ov_liverref$ref)))
 
   dfint1 <- data.frame(Cecum = c(Ref_only = length(setdiff(unique(ov$peak), unique(ov_cecum_liver$peakCecum))),
-                       Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
-                       Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
-                       None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))),
-                       Total = length(peaksgr_cecum)),
-             Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))),
-                       Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
-                       Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
-                       None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))),
-                       Total = length(peaksgr_liver)))
+                                 Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
+                                 Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
+                                 None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))),
+                                 Total = length(peaksgr_cecum)),
+                       Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))),
+                                 Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
+                                 Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
+                                 None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))),
+                                 Total = length(peaksgr_liver)))
 
   dfint2 <- data.frame(Ref = c(Cecum_only =length(setdiff(unique(ov$ref), unique(ov_liverref$ref))),
-                     Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))),
-                     Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))),
-                     None = length(setdiff(unique(names(m6arefgr)), c(unique(ov$ref), unique(ov_liverref$ref)))),
-                     Total = length(m6arefgr)))
-
-  dfint1
-  dfint2
-  #as.matrix(dfint1)/c(length(peaksgr_cecum), length(peaksgr_liver))
+                               Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))),
+                               Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))),
+                               None = length(setdiff(unique(names(m6arefgrsel)), c(unique(ov$ref), unique(ov_liverref$ref)))),
+                               Total = length(m6arefgrsel)))
+
   dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE)
-  #as.matrix(dfint2)/length(m6arefgr)
-  dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgr)), center = FALSE)
-
-
-  # percentage in ref
-  percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2])
-  # percentage in other tissue
-  percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2])
-  #length(subsetByOverlaps(peaksgr_liver, m6arefgr))/length(peaksgr_liver)
-  return(data.frame(Cecum = c(perc_inothertissue = percinothertissue[1], perc_inref = percinref[1]),
-                    Liver = c(perc_inothertissue = percinothertissue[2], perc_inref = percinref[2])))
-  #return(list(dfint1, dfint2, dfint1_perc, dfint2_perc ))
+  dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgrsel)), center = FALSE)
+
+
+  return(list(dfint1, dfint2, length(m6arefgrsel)))
 }
diff --git a/src/m6aAnalysis/R/venns.R b/src/m6aAnalysis/R/venns.R
index 59772c931d93fbf407a4926fffaf3b510b6c4a33..92395d711a28f12e0d397a29311b397323538c9d 100644
--- a/src/m6aAnalysis/R/venns.R
+++ b/src/m6aAnalysis/R/venns.R
@@ -1,116 +1,288 @@
-library(GenomicFeatures)
 library(data.table)
+library(GenomicRanges)
+library(foreach)
+library(gplots)
 
 
-plotVenns <- function(exp1='PeakDiffExpression/Cecum_all_MaxMaxValues_3/Cecum_all_MaxMaxValues_3.RData',
-                      exp2='PeakDiffExpression/Liver_all_MaxMaxValues_3/Liver_all_MaxMaxValues_3.RData',
-                      bed_ref="Genome/pc_ep_mouse_mm10.bed",
-                      filename="venn_LiverAll_CecumAll_ref.pdf",
-                      select_counts=FALSE) {
 
 
-  load(exp1)
+
+#' Compute overlaps between peaks and MetDB sites
+#' @param exp1 RData file output of m6amaker pipeline for the first MeRIPSeq experiment
+#' @param exp2 RData file output of m6amaker pipeline for the second MeRIPSeq experiment
+#' @param bed_ref BED file containing list of regions contained in the MetDB database for mm10
+#' @param bed_ref_lifted BED file containing list of regions contained in the MetDB database for hg38 lifted over mm10
+#' @param dir directory where to save files
+#' @param filename suffix of pdf file where to save plots
+#' @param refexp MetDB reference file containing description of each dataset stored in the database
+#' @param draw_venn_allMetDb TRUE if the venn diagramm summarizing the overlaps of the two experiments with MetDB should be drawn
+#' @param draw_int_perCond TRUE  if the barplot showing overlaps between each dataset type/condition available in MetDB and each MeRIPSeq experiment should be drawn
+#'
+plotIntersectionMetDB <- function(exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData',
+                                  exp2='PeakDiffExpression/Liver_allWithoutEc_T13_MaxMaxValues_3/Liver_allWithoutEc_T13_MaxMaxValues_3.RData',
+                                  bed_ref="Genome/pc_ep_mouse_mm10.bed",
+                                  bed_ref_lifted="Genome/pc_ep_human_hg38_liftOver2mm10_conversions.bed",
+                                  dir='.',
+                                  filename="LiverAll_CecumAll_ref.pdf",
+                                  refexp="Genome/metdb_v2_exp.csv",
+                                  select_counts=FALSE,
+                                  draw_venn_allMetDb=TRUE,
+                                  draw_int_perCond=FALSE
+) {
+
+  # load metdb_v2 experiment description
+  # reformat conditions such that identical conditions get identical names
+  refexp <- fread("Genome/metdb_v2_exp.csv")
+  setnames(refexp, c('id', 'id2', 'species', 'cellline', 'tissue', 'note'))
+  refexp[, condition := paste0(tissue, ': ', cellline)]
+  refexp[, condition := gsub('^: |: $', '', condition)]
+  #table(refexp$condition)
+  refexp[condition == 'A549', condition := 'Lung cancer: A549']
+  refexp[, condition := tolower(condition)]
+
+  # select human data
+  # reannotate conditions such that we can merge samples more homogeneously
+  refexphuman <- refexp[species %in% c('Homo sapiens'),]
+  refexphuman[grep('hepg2', condition, ignore.case = TRUE), condition := 'liver: HepG2']
+  refexphuman[grep('h1a|h1b', condition, ignore.case = TRUE), condition := 'h1a/h1b endoderm or embryonic stem']
+  refexphuman[grep('hek293t', condition, ignore.case = TRUE), condition := 'embryonic kidney: hek293t']
+
+  # select mouse data
+  refexp <- refexp[species %in% c('Mus musculus'),]
+  # reannotate conditions such that we can merge samples more homogeneously
+  refexp[condition %in% c("adult brain", "embyronic brain") , condition := 'brain']
+  refexp[grep('hek293', condition), condition := 'HEK293 derivative, HEK293T embryonic kidney']
+  refexp[grep('bvsc', condition), condition := "bvsc embryonic stem cells, bvsc embryonic bodies"]
+  refexp[grep('mef', condition), condition := "MEF cells"]
+  refexp[grep('hepg2', condition), condition := 'liver: HepG2']
+
+
+  # load peak annotations of exp2
+  load(exp2)
+  peaksgr <- copy(peaks)
+  setnames(peaksgr, c("chromo_peak",   "begin_peak",  "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
+  peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
+  if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)]
+  names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver))
+  peaksgr_liveror <- peaksgr_liver
+  peaksgr_liverde <- peaksgr_liver[paste0('Liver_', diff_peaks)]
+  peaksgr_liverde_unique <- reduce(peaksgr_liverde)
+
 
   # load peak annotations of exp1
+  exp1='PeakDiffExpression/Cecum_allWithoutEc_MaxMaxValues_3/Cecum_allWithoutEc_MaxMaxValues_3.RData'
+  load(exp1)
   peaksgr <- copy(peaks)
   setnames(peaksgr, c("chromo_peak",   "begin_peak",    "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
   peaksgr_cecum <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
   if (select_counts) peaksgr_cecum <- peaksgr_cecum[rownames(counts_epi)]
   names(peaksgr_cecum) <- paste0('Cecum_',names(peaksgr_cecum))
+  peaksgr_cecumor <- peaksgr_cecum
+  peaksgr_cecumde <- peaksgr_cecum[paste0('Cecum_', diff_peaks)]
+  peaksgr_cecumde_unique <- reduce(peaksgr_cecumde)
 
-  # load MERIP reference sites
+  # load MERIP reference sites / MOUSE
   m6aref <- fread(bed_ref)#sb_m6a_mouse_mm10.bed")
   setnames(m6aref, c('chr', 'start', 'end', 'type', 'id', 'strand'))
   m6arefgr <- makeGRangesFromDataFrame(as.data.frame(m6aref), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr")
-  m6arefgr <- reduce(m6arefgr)
+  m6arefgr$dataset <- sub('_[0-9]+$','', as.character(m6arefgr$id))
   names(m6arefgr) <- paste0('ref_',1:length(m6arefgr))
+  m6arefgror <- m6arefgr
+  # length selection
+  #m6arefgror<- m6arefgr <-  m6arefgr[width(m6arefgr) < 5000, ]
+
+  # load MERIP reference sites / HUMAN LIFTED OVER TO MOUSE
+  m6arefhuman <- fread(bed_ref_lifted)#sb_m6a_mouse_mm10.bed")
+  setnames(m6arefhuman, c('chr', 'start', 'end', 'id', 'chr_mm10_jesaispas', 'strand', 'start_mm10', 'end_mm10', 'supp'))
+  m6arefhumangr <- makeGRangesFromDataFrame(as.data.frame(m6arefhuman), keep.extra.columns = TRUE, start.field = "start", end.field = 'end', seqnames.field="chr")
+  m6arefhumangr$dataset <- sub('_[0-9]+$','', as.character(m6arefhumangr$id))
+  names(m6arefhumangr) <- paste0('ref_',1:length(m6arefhumangr))
+  m6arefhumangror <- m6arefhumangr
+  # length selection
+  #m6arefhumangror<- m6arefhumangr <-  m6arefhumangr[width(m6arefhumangr) < 5000, ]
+
+
+  #### compute venn for all peaks present in MetDB: MM10 ###
+  cond <- unique(refexp$condition)
+  dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr,
+                       datasetids = refexp[condition %in% cond, ]$id)
+  dfint1 <- dfs[[1]]
+  dfint2 <- dfs[[2]]
+  # there might be a few peaks that overlap two or more peaks from the other tissue or in the reference,
+  # when this happens, the overlap numbers may differ by a few peaks between Cecum and Liver
+  # in that case, we keep the number of minimum overlap and we move the additional peak(s) to the down intersection of the corresponding tissue
+  dfint1["None",] <-   dfint1["None",] + (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",]))
+  dfint1["Other_tissue_only",] <-   dfint1["Other_tissue_only",] - (dfint1["Other_tissue_only",]-min(dfint1["Other_tissue_only",]))
+  dfint1["Ref_only",] <-   dfint1["Ref_only",] + (dfint1["Both",]-min(dfint1["Both",]))
+  dfint1["Both",] <-   dfint1["Both",] - (dfint1["Both",]-min(dfint1["Both",]))
+
+
+  #number of MetDB regions that have no overlap with our peaks
+  #dfint2['None',]
+  #one of our peak can overlap several Med
+
+  if (draw_venn_allMetDb) {
+    #dev.off()
+    if (!missing(filename) | !is.null(filename)) pdf(paste0(dir,'/venn_allCond_',filename), 7,7)
+    venn.plot <- VennDiagram::draw.triple.venn(
+      area1 = dfint1["Total", "Cecum"], area2 = dfint1["Total", "Liver"],
+      area3 = dfint2["None", "Ref"] + dfint1["Ref_only", "Cecum"]+ dfint1["Ref_only", "Liver"]+ dfint1["Both", "Cecum"],
+      n12 =  dfint1["Other_tissue_only", "Cecum"] + dfint1["Both", "Cecum"],
+      n13 =  dfint1["Ref_only", "Cecum"] + dfint1["Both", "Cecum"],
+      n23 = dfint1["Ref_only", "Liver"] + dfint1["Both", "Liver"],
+      n123 = dfint1["Both", "Cecum"],
+      category = c("Cecum", "Liver", "Ref"),
+      #fill = c("dodgerblue", "goldenrod1", "darkorange1"),
+      cat.col = c("dodgerblue", "goldenrod1", "darkorange1"),
+      cat.cex = 2,
+      margin = 0.05,
+      cex = 1.5,
+      ind = TRUE);
+    if (!missing(filename) | !is.null(filename)) dev.off()
+  }
+
+
+
+  if (draw_int_perCond) {
+    #### count overlap for each dataset available in MetDB : MM10 ####
+    conditions <- unique(refexp$condition)
+
+    expsel <- refexp[condition %in% cond, ]
+
+    intbycond <- foreach (cond = conditions, .combine = rbind) %do% {
+
+      expsel <- refexp[condition %in% cond, ]
+
+      dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefgr, datasetids = expsel$id)
+      dfint1 <- dfs[[1]]
+      dfint2 <- dfs[[2]]
+      nbref <- dfs[[3]]
+
+      # nb/percentage in ref
+      nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2])
+      #percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2])
+      # nb/percentage in other tissue
+      nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2])
+      #percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2])
+      #peaksgr_cecum <- peaksgr_cecumor
+      #peaksgr_liver <- peaksgr_liveror
+
+      return(c(nbinref, nbref, paste(expsel$id, collapse = ',')))
+    }
+    colnames(intbycond) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition')
+    intbycond <- cbind(metdb_condition = conditions, intbycond, species = 'Mus musculus')
+
+
+    #### count overlap for each dataset available in MetDB : HG38 lifted over MM10 ####
+    conditions <- unique(refexphuman$condition)
+
+    intbycond_human <- foreach (cond = conditions, .combine = rbind) %do% {
+
+      expsel <- refexphuman[condition %in% cond, ]
+
+      dfs <- countOverlaps(peaksgr_cecum = peaksgr_cecum, peaksgr_liver = peaksgr_liver, m6arefgr = m6arefhumangr, datasetids = expsel$id)
+      dfint1 <- dfs[[1]]
+      dfint2 <- dfs[[2]]
+      nbref <- dfs[[3]]
+
+      # percentage in ref
+      nbinref <- c(dfint1[1,1]+dfint1[3,1], dfint1[1,2]+dfint1[3,2])
+      # percentage in other tissue
+      nbinothertissue <- c(dfint1[2,1]+dfint1[3,1], dfint1[2,2]+dfint1[3,2])
+
+      return(c(nbinref, nbref, paste(expsel$id, collapse = ',')))
+    }
+    colnames(intbycond_human) <- c('Cecum', 'Liver', 'metdb_nbpeaks', 'metdb_dataset')#, 'metdb_condition')
+    intbycond_human <- cbind(metdb_condition = conditions, intbycond_human, species = 'Homo sapiens (hg38 lifted over mm10)')
+
+
+    intbycondall1 <- data.frame(rbind(intbycond, intbycond_human), stringsAsFactors = FALSE)
+    intbycondall1$Cecum_perc <- as.numeric(as.character(intbycondall1$Cecum))/length(peaksgr_cecum)
+    intbycondall1$Liver_perc <- as.numeric(as.character(intbycondall1$Liver))/length(peaksgr_liver)
+    #write.table(intbycondall1, file = 'doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.xls', sep = '\t', quote = FALSE, row.names = FALSE)
+
+    intbycondall <- as.data.frame(rbind(intbycond, intbycond_human))
+    intbycondall <- rbind(cbind(  intbycondall[,-3] %>% dplyr::rename(.,nbOverlap = Cecum), Tissue = 'Cecum'),
+                          cbind(  intbycondall[,-2]  %>% dplyr::rename(.,nbOverlap = Liver), Tissue = 'Liver'))
+
+    intbycondall$percOverlap[intbycondall$Tissue == 'Cecum'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Cecum']))/length(peaksgr_cecum)
+    intbycondall$percOverlap[intbycondall$Tissue == 'Liver'] <- as.numeric(as.character(intbycondall$nbOverlap[intbycondall$Tissue == 'Liver']))/length(peaksgr_liver)
+
+    intbycondall$species <- factor(intbycondall$species, levels = c("Mus musculus", "Homo sapiens (hg38 lifted over mm10)"))
+
+
+    ggnb <- ggplot(intbycondall, aes(x = metdb_condition, y = as.numeric(as.character(nbOverlap)), fill = Tissue)) +
+      geom_bar(stat = 'identity', position = 'dodge') + facet_wrap(~species, scales = 'free_x') +
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('nb of peaks overlapping a reference') + xlab('')
+
+
+    ggperc <- ggplot(intbycondall, aes(x = metdb_condition, y = percOverlap, fill = Tissue)) +
+      geom_bar(stat = 'identity', position = 'dodge', width = 0.8) + facet_wrap(~species, scales = 'free_x') +
+      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab('% of peaks overlapping a reference') +
+      xlab('') + scale_y_continuous(labels=scales::percent)
+
+    #pdf('doc/Point by point reply_NatureComms/compare_metdb/all_peaks/overlapWithMetDB_perCondition.pdf', width = 8, height = 12)
+    pdf(paste0(dir,'/barplot_byCond_',filename), width = 8, height = 12)
+    print(gridExtra::marrangeGrob(list(ggnb, ggperc),# + ggtitle('tSNE')), ,d12tsne1,d12tsne
+                                  ncol = 1, nrow = 2, top = paste("Overlap between the detected peaks \n and Met-db v2 peaks")))
+
+    dev.off()
+
+  }
+}
+
+
+
+
+countOverlaps <- function(peaksgr_cecum, peaksgr_liver, m6arefgr, datasetids) {
+
+  m6arefgrsel <- reduce(m6arefgr[m6arefgr$dataset %in% datasetids,])
+  names(m6arefgrsel) <- paste0('ref_',1:length(m6arefgrsel))
 
   # compute overlap between exp1 peaks and ref
-  ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgr, minoverlap = 1)
+  ov <- findOverlaps(query = peaksgr_cecum, subject = m6arefgrsel, minoverlap = 1)
   ov <- as.data.table(ov)
-  ov[, peak := names(peaksgr_cecum)[queryHits]]
-  ov[, ref := names(m6arefgr)[subjectHits]]
-  ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgr[ref])), by = .I]
-  # assign name of the ref peak that has the biggest overlap to the exp1 peak
-  peaks_to_ref_cecum <- ov[, ref[which.max(ovWidth)], by = peak]
-  names(peaksgr_cecum)[match(peaks_to_ref_cecum$peak,names(peaksgr_cecum))] <- paste0(peaks_to_ref_cecum$peak, '_', peaks_to_ref_cecum$V1)
-  #ov[, refm6a := names(m6arefgr)[subjectHits]]
-
-  # load peak annotations of exp2
-  load(exp2)
-  peaksgr <- copy(peaks)
-  setnames(peaksgr, c("chromo_peak",   "begin_peak",  "end_peak", 'chr'), c('chr', 'start', 'end','chr_gene'))
-  peaksgr_liver <- makeGRangesFromDataFrame(peaksgr, keep.extra.columns = TRUE, seqnames.field="chr")
-  if (select_counts) peaksgr_liver <- peaksgr_liver[rownames(counts_epi)]
-  names(peaksgr_liver) <- paste0('Liver_',names(peaksgr_liver))
+  if (nrow(ov) > 0 ) {
+    ov[, peak := names(peaksgr_cecum)[queryHits]]
+    ov[, ref := names(m6arefgrsel)[subjectHits]]
+    ov[, ovWidth := width(pintersect(peaksgr_cecum[peak],m6arefgrsel[ref])), by = .I]
+  } else {
+    ov <- data.table(peaks = NA, ref = NA, ovWidth = 0)
+  }
 
   # compute overlap between exp2 peaks and ref
-  ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgr, minoverlap = 1)
+  ov_liverref <- findOverlaps(query = peaksgr_liver, subject = m6arefgrsel, minoverlap = 1)
   ov_liverref <- as.data.table(ov_liverref)
   ov_liverref[, peak :=names(peaksgr_liver)[queryHits]]
-  ov_liverref[, ref := names(m6arefgr)[subjectHits]]
-  ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgr[ref])), by = .I]
-  # assign name of the ref peak that has the biggest overlap to the exp2 peak
-  peaks_to_ref_liver <- ov_liverref[, ref[which.max(ovWidth)], by = peak]
-  names(peaksgr_liver)[match(peaks_to_ref_liver$peak,names(peaksgr_liver))] <- peaks_to_ref_liver$V1
-
+  ov_liverref[, ref := names(m6arefgrsel)[subjectHits]]
+  ov_liverref[, ovWidth := width(pintersect(peaksgr_liver[peak],m6arefgrsel[ref])), by = .I]
 
   # compute overlap between exp1 and exp2
   ov_cecum_liver <- as.data.table(findOverlaps(query = peaksgr_liver, subject = peaksgr_cecum, minoverlap = 1))
   ov_cecum_liver[, peakCecum := names(peaksgr_cecum)[subjectHits]]
   ov_cecum_liver[, peakLiver := names(peaksgr_liver)[queryHits]]
   ov_cecum_liver[, ovWidth := width(pintersect(peaksgr_liver[peakLiver],peaksgr_cecum[peakCecum])), by = .I]
-  # assign same name to the peak overlapping between exp1 and exp2, using the max overlap only
-  peaks_to_cecum_liver <- ov_cecum_liver[, peakCecum[which.max(ovWidth)], by = peakLiver]
-  names(peaksgr_liver)[match(peaks_to_cecum_liver$peakLiver,names(peaksgr_liver))] <- peaks_to_cecum_liver$V1
-  # peaks_to_cecum_liver <- ov_cecum_liver[, peakLiver[which.max(ovWidth)], by = peakCecum]
-  # names(peaksgr_cecum)[match(peaks_to_cecum_liver$peakCecum,names(peaksgr_cecum))] <- peaks_to_cecum_liver$V1
-
-  if (!missing(filename) | !is.null(filename)) pdf(filename, 7,7)
-  venn(data = list(Liver=names(peaksgr_liver), Cecum=names(peaksgr_cecum), "reference peaks"=names(m6arefgr)))
-  if (!missing(filename) | !is.null(filename)) dev.off()
-
-  #length(unique(ov$peak))/length(peaksgr_cecum)
-  #length(unique(ov$peak));length(peaksgr_cecum)
-  #length(unique(ov_liverref$peak))/length(peaksgr_liver)
-  #length(unique(ov_cecum_liver$peakCecum))/length(peaksgr_cecum)
-  #length(unique(ov_cecum_liver$peakLiver))/length(peaksgr_liver)
-  #summary(as.numeric(table(ov_liverref$ref)))
 
   dfint1 <- data.frame(Cecum = c(Ref_only = length(setdiff(unique(ov$peak), unique(ov_cecum_liver$peakCecum))),
-                       Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
-                       Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
-                       None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))),
-                       Total = length(peaksgr_cecum)),
-             Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))),
-                       Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
-                       Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
-                       None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))),
-                       Total = length(peaksgr_liver)))
+                                 Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
+                                 Both = length(intersect(unique(ov_cecum_liver$peakCecum), unique(ov$peak))),
+                                 None = length(setdiff(unique(names(peaksgr_cecum)), c(unique(ov_cecum_liver$peakCecum), unique(ov$peak)))),
+                                 Total = length(peaksgr_cecum)),
+                       Liver = c(Ref_only = length(setdiff(unique(ov_liverref$peak), unique(ov_cecum_liver$peakLiver))),
+                                 Other_tissue_only = length(setdiff(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
+                                 Both = length(intersect(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak))),
+                                 None = length(setdiff(unique(names(peaksgr_liver)), c(unique(ov_cecum_liver$peakLiver), unique(ov_liverref$peak)))),
+                                 Total = length(peaksgr_liver)))
 
   dfint2 <- data.frame(Ref = c(Cecum_only =length(setdiff(unique(ov$ref), unique(ov_liverref$ref))),
-                     Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))),
-                     Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))),
-                     None = length(setdiff(unique(names(m6arefgr)), c(unique(ov$ref), unique(ov_liverref$ref)))),
-                     Total = length(m6arefgr)))
-
-  dfint1
-  dfint2
-  #as.matrix(dfint1)/c(length(peaksgr_cecum), length(peaksgr_liver))
-  dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE)
-  #as.matrix(dfint2)/length(m6arefgr)
-  dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgr)), center = FALSE)
-
-
-  # percentage in ref
-  percinref <- c(dfint1_perc[1,1]+dfint1_perc[3,1], dfint1_perc[1,2]+dfint1_perc[3,2])
-  # percentage in other tissue
-  percinothertissue <- c(dfint1_perc[2,1]+dfint1_perc[3,1], dfint1_perc[2,2]+dfint1_perc[3,2])
-  #length(subsetByOverlaps(peaksgr_liver, m6arefgr))/length(peaksgr_liver)
-  return(data.frame(Cecum = c(perc_inothertissue = percinothertissue[1], perc_inref = percinref[1]),
-                    Liver = c(perc_inothertissue = percinothertissue[2], perc_inref = percinref[2])))
-  #return(list(dfint1, dfint2, dfint1_perc, dfint2_perc ))
-}
+                               Liver_only =length(setdiff(unique(ov_liverref$ref), unique(ov$ref))),
+                               Both = length(intersect(unique(ov$ref), unique(ov_liverref$ref))),
+                               None = length(setdiff(unique(names(m6arefgrsel)), c(unique(ov$ref), unique(ov_liverref$ref)))),
+                               Total = length(m6arefgrsel)))
 
+  dfint1_perc <- scale(as.matrix(dfint1), scale = c(length(peaksgr_cecum), length(peaksgr_liver)), center = FALSE)
+  dfint2_perc <- scale(as.matrix(dfint2), scale = c(length(m6arefgrsel)), center = FALSE)
 
 
+  return(list(dfint1, dfint2, length(m6arefgrsel)))
+}