diff --git a/Wednesday/MAF/EPITOME_CS_integration.Rmd b/Wednesday/MAF/EPITOME_CS_integration.Rmd
deleted file mode 100644
index 09d0d1090dc9e3457916a7cfe16a7d3f6848295b..0000000000000000000000000000000000000000
--- a/Wednesday/MAF/EPITOME_CS_integration.Rmd
+++ /dev/null
@@ -1,1107 +0,0 @@
----
-title: "Epigenomic memory of infection: Data integration"
-subtitle : "H3K4me2, transcriptome time courses and chromatin state"
-author: "Claudia Chica"
-date: "02.2021"
-output: 
-  html_document: 
-    keep_md: yes
-    number_sections: yes
-    smart: no
-    toc: yes
-    toc_float: yes
-editor_options: 
-  chunk_output_type: console
----
-
-```{r setup, message = FALSE, warning = FALSE, echo = FALSE}
-knitr::opts_chunk$set(message=FALSE,warning=FALSE,echo=FALSE)#, cache=TRUE, cache.lazy = FALSE)
-knitr::opts_knit$set(progress = FALSE, verbose = FALSE)
-#knitr::opts_chunk$set(dev = "svg")
-
-WDIR="/Volumes/bioit/12891_Chevalier_EpiMemStrep"
-
-library(ggplot2)
-library(plyranges)
-library(RColorBrewer)
-library(knitr)
-library(pheatmap)
-library(patchwork)
-library(FactoMineR)
-library(org.Hs.eg.db)
-library(msigdbr)
-
-
-```
-
-# Methylation profile
-
-```{r loadEPIGresults}
-# Load epigenomic counts
-mark="H3K4me2"
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/countNormLength_",mark,".RData")
-load(fileOut)
-
-# Discard ONE peak with negative normalised values in a SINGLE replicate (artefact from cycliclowess normalisation)
-countNormLength_H3K4me2=countNormLength_H3K4me2[countNormLength_H3K4me2$norm.H3K4me2_UI_REP1>0,]
-
-# Get list of DM peaks
-files=list.files(path = paste0(WDIR,"/results_project12891/tables/tables_v3_UCSC/",mark,"/"),pattern = "resAnDif_with_annotation_sorted.final.xls", full.names = T)
-DM_results=list() ; i=1
-for (fileDM in files) {
-  no_col <- max(count.fields(fileDM, sep = "\t"))
-  tmp=read.table(fileDM,header = F,sep="\t", fill = TRUE,skip = 1,
-                 col.names=1:no_col,colClasses = c("character",rep("numeric",16),rep("character",no_col-17)))
-  row.names(tmp) <- tmp[,1]
-  tmp=tmp[,14:17]
-  colnames(tmp) <- c("baseMean", "log2FoldChange", "pvalue", "padj")
-  tmp=tmp[row.names(countNormLength_H3K4me2),]
-  DM_results[[i]]=tmp ; i=i+1
-}
-rm(tmp)
-names(DM_results) <- c("D7vsH3","D7vsUI","H3vsUI")
-
-# DM profiles
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/DMpeaks_",mark,".RData")
-load(fileOut)
-
-# Add not DEGs
-DMpeaks_H3K4me2=intersect(DMpeaks_H3K4me2,row.names(DM_results$D7vsUI))
-tmp=setdiff(row.names(countNormLength_H3K4me2),DMpeaks_H3K4me2)
-DMs=data.frame(peakID=DMpeaks_H3K4me2,
-               Kind="DM",
-Sign=ifelse(DM_results$D7vsUI[DMpeaks_H3K4me2,]$log2FoldChange>=0,"Gain","Loss"))
-notDMs=data.frame(peakID=tmp,
-                  Kind="notDM",
-Sign=ifelse(DM_results$D7vsUI[tmp,]$log2FoldChange>=0,"Gain","Loss"))
-DMs=rbind(DMs,notDMs)
-DMs$peakID=as.character(DMs$peakID)
-rm(tmp,notDMs)
-
-kable(table(DMs$Kind,DMs$Sign))
-```
-
-# Transcriptional profile
-```{r loadTOMEresults}
-# Expression data normalised and batch corrected 
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/Data_RMAandNoBatch_Annot.RData")
-load(fileOut)
-
-# DA results, same object used for the functional analysis
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/DE_results_Annot.RData")
-load(fileOut)
-
-SYMBOL2ENTREZ=unique(data.frame(SYMBOL=as.character(DE_results$H3vsUI$SYMBOL),
-                         ENTREZID=as.character(DE_results$H3vsUI$ENTREZID),stringsAsFactors = FALSE))
-
-# DEGs padjTh=.05 ; lfcTh=0
-fileOut=paste0(WDIR,"/results_project12891/tables/transcriptome/DEGs_H3K4me2profiles_CompleteAnnotationPeaks_nearestGene.txt")
-DEGs=read.table(fileOut,header = T)[,7:10]
-DEGs=unique(na.omit(DEGs))
-DEGs$SYMBOL=as.character(DEGs$SYMBOL)
-DEGs$ENTREZID=as.character(DEGs$ENTREZID)
-# Add not DEGs
-tmp=setdiff(SYMBOL2ENTREZ$ENTREZID,DEGs$ENTREZID)
-H3vsUI=DE_results$H3vsUI[!duplicated(DE_results$H3vsUI$ENTREZID),]
-DIvsD7=DE_results$DIvsD7[!duplicated(DE_results$DIvsD7$ENTREZID),]
-notDEGs=data.frame(ENTREZID=SYMBOL2ENTREZ[SYMBOL2ENTREZ$ENTREZID %in% tmp,]$ENTREZID,
-                   SYMBOL=SYMBOL2ENTREZ[SYMBOL2ENTREZ$ENTREZID %in% tmp,]$SYMBOL,
-                   Kind="notDE",
-# Sign=ifelse(H3vsUI[H3vsUI$ENTREZID %in% tmp,]$logFC>=0,
-#      ifelse(DIvsD7[DIvsD7$ENTREZID %in% tmp,]$logFC>=0,"Up","UpDown"),
-#      ifelse(DIvsD7[DIvsD7$ENTREZID %in% tmp,]$logFC<0,"Down","DownUp")))
-Sign=ifelse(H3vsUI[H3vsUI$ENTREZID %in% tmp,]$logFC>=0,"Up","Down"))
-DEGs=rbind(DEGs,notDEGs)
-DEGs$SYMBOL=as.character(DEGs$SYMBOL)
-DEGs$ENTREZID=as.character(DEGs$ENTREZID)
-rm(tmp,H3vsUI,DIvsD7,notDEGs)
-
-kable(table(DEGs$Kind,DEGs$Sign))
-
-```
-
-```{r load_peaks2Gene}
-mark="H3K4me2"
-### Nearest gene
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mergedPeaks_annotNearest_",mark,".RData")
-load(fileOut)
-peaks2GeneNearest_H3K4me2=data.frame(
-  peakId=mergedPeaks_annotNearest_H3K4me2@ranges@NAMES,
-  SYMBOL=as.character(mergedPeaks_annotNearest_H3K4me2@elementMetadata@listData[["mcols.GENESYMBOL"]]),
-  distance=mergedPeaks_annotNearest_H3K4me2@elementMetadata@listData[["distance"]],
-  stringsAsFactors=FALSE)
-peaks2GeneNearest_H3K4me2=merge(peaks2GeneNearest_H3K4me2,SYMBOL2ENTREZ,by.x=2,by.y=1,all.x=TRUE)[,c(2,1,3,4)]
-
-### Target gene from MEME
-# fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mergedPeaks_annotTgene_",mark,".RData")
-# load(fileOut)
-# peaks2GeneTgene_H3K4me2=data.frame(peakId=mergedPeaks_annotTgene_H3K4me2$peakId,
-#                   SYMBOL=as.character(mergedPeaks_annotTgene_H3K4me2$Gene_Name),
-#                   ENTREZID=mergedPeaks_annotTgene_H3K4me2$ENTREZID,
-#                   distance=0,stringsAsFactors=FALSE)
-
-# Load ENTREZ ENSEMBL ids mapping to cross Tgene and EPI-TOME information
-ENSEMBLEids <- org.Hs.egENSEMBL 
-mapped_genes <- mappedkeys(ENSEMBLEids)
-ENTREZtoENSEMBL <- as.data.frame(ENSEMBLEids[mapped_genes])
-colnames(ENTREZtoENSEMBL)=c("ENTREZID","ENSEMBLEID")
-
-# Load Tgene MEME results
-fileIn=paste0(WDIR,"/results_project12891/MEME_Tgene/H2K4me2/links.tsv")
-mergedPeaks_Tgene=read.table(fileIn,header = T,sep = "\t")
-## Get only significant associations peak-gene
-pvalTh=.05 #; FDRth=.5
-mergedPeaks_TgeneSig=subset(mergedPeaks_Tgene,mergedPeaks_Tgene$CnD_P_Value < pvalTh)
-mergedPeaks_TgeneSig$chr=gsub("chr","",gsub(":[0-9-]*","",mergedPeaks_TgeneSig$RE_Locus))
-mergedPeaks_TgeneSig$start=as.character(as.integer(gsub("chr[0-9XY]*:","",gsub("-[0-9]*","",mergedPeaks_TgeneSig$RE_Locus)))-1)
-mergedPeaks_TgeneSig$end=gsub(".*-","",mergedPeaks_TgeneSig$RE_Locus)
-mergedPeaks_TgeneSig$peakID=apply(mergedPeaks_TgeneSig[,18:20],1,function(x){paste0(x[1],"_",x[2],"_",x[3])})
-mergedPeaks_TgeneSig$ENSEMBL=gsub("\\.[0-9-]*","",mergedPeaks_TgeneSig$Gene_ID)
-peaks2GeneTgene_H3K4me2=na.omit(merge(mergedPeaks_TgeneSig,
-                           ENTREZtoENSEMBL,by.x=22,by.y=2,all.x=TRUE)[,c(22,23,10,14,17)])
-
-# Select unique gene-region associations 
-# select association minimun Tgene pvalue 
-peaks2GeneTgene_H3K4me2$peak2gene=paste0(peaks2GeneTgene_H3K4me2$peakID,"_",
-  peaks2GeneTgene_H3K4me2$ENTREZID)
-peaks2GeneTgene_H3K4me2$tmp=paste0(peaks2GeneTgene_H3K4me2$peakID,"_",
-  peaks2GeneTgene_H3K4me2$ENTREZID,"_",as.character(peaks2GeneTgene_H3K4me2$CnD_P_Value))
-peaks2GeneTgene_H3K4me2=peaks2GeneTgene_H3K4me2[!duplicated(peaks2GeneTgene_H3K4me2$tmp),]
-
-tmp=aggregate(peaks2GeneTgene_H3K4me2$CnD_P_Value,list(peaks2GeneTgene_H3K4me2$peak2gene),min)
-tmp$tmp=paste0(tmp$Group.1,"_",as.character(tmp$x))
-peaks2GeneTgene_H3K4me2=peaks2GeneTgene_H3K4me2[peaks2GeneTgene_H3K4me2$tmp %in% tmp$tmp,-7]
-
-rm(mergedPeaks_TgeneSig,ENSEMBLEids,mapped_genes,ENTREZtoENSEMBL)
-```
-
-# Chromatin states
-
-```{r selectPeaks, eval=FALSE, include=FALSE}
-# Select peaks using counts
-logFCthUp_H3K4me2=.005 #quantile .75
-logFCthDown_H3K4me2=-.005 #quantile .25
-tmp=countNormLength_H3K4me2
-selPeaksAllRep=row.names(
-  subset(tmp,(
-  (log(tmp$norm.H3K4me2_D7_REP1/tmp$norm.H3K4me2_UI_REP1,2) > logFCthUp_H3K4me2 & 
-   log(tmp$norm.H3K4me2_D7_REP2/tmp$norm.H3K4me2_UI_REP2,2) > logFCthUp_H3K4me2)  |
-  (log(tmp$norm.H3K4me2_D7_REP1/tmp$norm.H3K4me2_UI_REP1,2) < logFCthDown_H3K4me2 &  
-   log(tmp$norm.H3K4me2_D7_REP2/tmp$norm.H3K4me2_UI_REP2,2) < logFCthDown_H3K4me2))))
-
-selPeaksAll=row.names(tmp)
-
-# Select peaks related to DEGs 
-selPeaks_DEGs_nearest=unique(merge(DEGs[DEGs$Kind!="notDE",],peaks2GeneNearest_H3K4me2[,c(4,1)],by=1)[,5])
-selPeaks_DEGs_target=unique(merge(DEGs[DEGs$Kind!="notDE",],peaks2GeneTgene_H3K4me2[,c(3,1)],by=1)[,5])
-
-print(c("Number of:"))
-print(c("Total peaks",dim(tmp)[1]))
-print(c("Selected peaks - methylation logFold",length(selPeaksAll)))
-print(c("Selected peaks - reproducible methylation logFold",length(selPeaksAllRep)))
-print(c("Selected peaks - nearest to DEG",length(selPeaks_DEGs_nearest)))
-print(c("Selected peaks - with DEG target",length(selPeaks_DEGs_target)))
-
-print(c("Methylation logFold nearest to DEG",length(intersect(selPeaksAll,selPeaks_DEGs_nearest))))
-print(c("Methylation logFold with DEG target",length(intersect(selPeaksAll,selPeaks_DEGs_target))))
-print(c("Reproducible methylation logFold nearest to DEG",length(intersect(selPeaksAllRep,selPeaks_DEGs_nearest))))
-print(c("Reproducible methylation logFold with DEG target",length(intersect(selPeaksAllRep,selPeaks_DEGs_target))))
-
-ggplotdf=data.frame(log2FC=c(
-  DM_results$D7vsUI[selPeaksAll,]$log2FoldChange,
-  DM_results$D7vsUI[selPeaksAllRep,]$log2FoldChange,
-  DM_results$D7vsUI[selPeaks_DEGs_nearest,]$log2FoldChange,
-  DM_results$D7vsUI[selPeaks_DEGs_target,]$log2FoldChange),
-  peakPopulation=factor(c(rep("All peaks",length(selPeaksAll)),
-                   rep("Reproducible Log2FC",length(selPeaksAllRep)),
-                   rep("DEGs nearest",length(selPeaks_DEGs_nearest)),
-                   rep("DEGs target",length(selPeaks_DEGs_target))),
-                   levels = c("All peaks","Reproducible Log2FC","DEGs nearest","DEGs target")))
-
-ggplot(ggplotdf, aes(x=log2FC,fill=peakPopulation)) +
-    geom_density(alpha=0.5) + facet_grid(~ peakPopulation)
-```
-
-```{r get_mergedPeaks_annotTXS}
-# Load all merged peaks coords
-fileIn=paste0(WDIR,"/results_project12891/tables/tables_v3_UCSC/",mark,"/",mark,"_mergedPeaks.bed")
-mergedPeaks=read_bed(fileIn)
-
-# Annotate merged peaks
-fileAnnot=paste0(WDIR,"/results_project12891/igv/igv_session_v3/annotation/hg19_transcripts.geneSymbol.reformat.bed")
-txs_gr <- read_bed(fileAnnot)
-tss_gr <- GRanges(seqnames = txs_gr@seqnames,
-                   ranges = IRanges(start = txs_gr@ranges@start-1e3, end = txs_gr@ranges@start+1e3),
-                   strand = txs_gr@strand,
-                   mcols = data.frame(GENESYMBOL=mcols(txs_gr)$name))
-txs_gr <- GRanges(seqnames = txs_gr@seqnames,
-                   ranges = txs_gr@ranges,
-                   strand = txs_gr@strand,
-                   mcols = data.frame(GENESYMBOL=mcols(txs_gr)$name))
-
-mergedPeaks_annotTXS=array(NA,dim=length(mergedPeaks)) ; names(mergedPeaks_annotTXS)=mergedPeaks$name
-ovtss=unique(findOverlaps(mergedPeaks,tss_gr)@from)
-mergedPeaks_annotTXS[ovtss]="TSS"
-ovintraG=setdiff(unique(findOverlaps(mergedPeaks,txs_gr)@from),ovtss)
-mergedPeaks_annotTXS[ovintraG]="intraG"
-mergedPeaks_annotTXS[is.na(mergedPeaks_annotTXS)]="interG"
-```
-
-```{r annotationQuantification}
-DIR=paste0(WDIR,"/results_project12891/Encode_data/bigWig/")
-fileMetadata=paste0(DIR,"ENCODE_metadata_bigWig_foldChangeOverControl.released.txt")
-Encode_metadata <- read.table(file=fileMetadata,header=TRUE,sep="\t")
-
-marks=c("H3K4me3", "H3K4me2", "H3K27ac", "H3K4me1", "H3K79me2", "H3K27me3")
-AnnotationCols <- data.frame(
-  Mark=factor(sub("-human","",Encode_metadata$Experiment.target),levels = marks),
-  row.names = Encode_metadata$File.accession)
-
-colsG=brewer.pal(n = 3, name = "Greys")
-colsM=brewer.pal(n = 4, name = "Greens")
-colsS=brewer.pal(n = 3, name = "Blues")[2:3]
-nClust=8 ; colsC=brewer.pal(n = nClust, name = "Set2")
-colsAnnot <- list(Mark=c("H3K4me3"=colsS[1], "H3K4me2"=colsS[2], "H3K27ac"=colsM[1], "H3K4me1"=colsM[2], "H3K79me2"=colsM[3], "H3K27me3"=colsM[4]),
-  DM_sign=c("Gain"="#FF0000", "Loss"="#FF000030"),
-  DM_kind=c("DM"="#000000", "notDM"="transparent"),
-  Localization=c("TSS"=colsG[1], "intraG"=colsG[2], "interG"=colsG[3]),
-  CS=c("1"=colsC[1],"2"=colsC[2],"3"=colsC[3],"4"=colsC[4],"5"=colsC[5],"6"=colsC[6],"7"=colsC[7],"8"=colsC[8]))
-```
-
-```{bash getMtx_ENCODElogchange, eval=FALSE, include=FALSE}
-WDIR=/pasteur/projets/policy01/BioIT/12891_Chevalier_EpiMemStrep/results_project12891/
-module load UCSC-tools/v373
-module load bedtools/2.25.0
-
-# 1. Download bigWig files
-# 2. Get bedGraph files
-cd Encode_data/bigWig/
-for file in *.bigWig ; do 
-if [ ! -f ${file/bigWig/bedGraph} ] ; then echo $file ; 
-srun bigWigToBedGraph $file ${file/bigWig/bedGraph} &  fi ; done
-
-# 3. Overlap with mergedPeaks
-mark=H3K4me2
-filePeaks=$WDIR/tables/tables_v3_UCSC/*/${mark}_mergedPeaks.bed
-cd $WDIR/Encode_data/bigWig
-
-# 3.1. Small files
-emacs overlapBedGraph_mergedPeaks.sh
-#!/bin/sh
-i=$(head -n ${SLURM_ARRAY_TASK_ID} $1 | tail -n 1) 
-WDIR=$2
-filePeaks=$3
-mark=$4
-
-if [ ! -f $WDIR/${i/.bedGraph/_mergedPeaks_${mark}.ov} ] ; then 
-srun -o $i.ovb -e $i.e intersectBed -a $filePeaks -b $i -wb
-awk '{s[$4]=s[$4]+$NF ; n[$4]=n[$4]+1} END {for (p in n) {print p"\t"s[p]/n[p]}}' $i.ovb | 
-sort -k1,1 > $WDIR/${i/.bedGraph/_mergedPeaks_${mark}.ov} ; fi
-
-ls *bedGraph > fileList.txt
-n=`wc -l fileList.txt | awk '{print $1}'`
-sbatch -p hubbioit --qos hubbioit -o ov.o -e ov.e --mem=200G --array=1-$n%$n overlapBedGraph_mergedPeaks.sh fileList.txt $WDIR/Encode_data/bigWig/ $filePeaks $mark
-
-# 4 Assemble matrix
-echo "Peak_name" > header0 ; i=0
-for file in *$mark*ov ; do let j=$i+1
-echo ${file/_mergedPeaks_${mark}.ov} | paste header$i - > header$j ; ((i++)) ; done
-paste *$mark*ov | awk '{line=$1 ; for (i=2;i<=NF;i=i+2) {line=line"\t"$i} ; print line}' |
-cat header$j - > ENCODE_mergedPeaks_${mark}.ov
-
-# Clean
-rm $WDIR/Encode_data/bigWig/*.ovb $WDIR/Encode_data/bigWig/*.bedGraph
-rm $WDIR/Encode_data/bigWig/*.e $WDIR/Encode_data/bigWig/ov.o
-rm header*
-```
-
-```{r ENCODE_bigWig}
-DIR=paste0(WDIR,"/results_project12891/Encode_data/bigWig/")
-fileSignal=paste0(DIR,"ENCODE_mergedPeaks_",mark,".ov")
-matSignal <- read.table(file=fileSignal,header=TRUE,row.names = 1,sep="\t")
-
-markOrder=c(#H3K4me3
-  "ENCFF628ANV", "ENCFF510LTC", "ENCFF556OVF",
-  #"ENCFF313GRK", "ENCFF709KWY", "ENCFF635VEW",
-  "ENCFF250OYQ", "ENCFF189JCB", "ENCFF421ZNH",
-  #H3K4me2
-  "ENCFF876DTJ", "ENCFF975YWP", "ENCFF419LFZ",
-  #H3K4me1
-  "ENCFF165ZPD", "ENCFF569ZJQ", "ENCFF613NHX",
-  #H3K27ac
-  #"ENCFF216GUZ", "ENCFF597SRK", "ENCFF562LEK",
-  "ENCFF137KNW", "ENCFF103BLQ", "ENCFF663ILW",
-  #H3K79me2
-  "ENCFF892LYD", "ENCFF154HXA", "ENCFF931LYX",
-  #H3K27me3
-  "ENCFF819QSK", "ENCFF134YLO", "ENCFF336AWS")
-```
-
-```{r ENCODE_stats}
-ENCODE_stats <- function(matSignalSel,selPeaks,mergedPeaks_annotTXS,DMs) {
-allZero=which(apply(matSignalSel,1,sum)==0)
-if (length(allZero)>0) matSignalSel=matSignalSel[-allZero,]
-
-mtx=t(matSignalSel)
-d=dist(t(scale(mtx)))
-hc=hclust(d,method = "ward.D2")
-nClust=8
-clust=cutree(hc,k=nClust)
-
-selPeaks=intersect(selPeaks,row.names(matSignalSel))
-
-# Complete AnnotationRows
-AnnotationRows <- data.frame(
-  Localization=factor(mergedPeaks_annotTXS[selPeaks],levels = c("TSS","intraG","interG")),
-  CS=factor(clust[selPeaks]),
-  row.names = selPeaks
-)
-AnnotationRows=merge(AnnotationRows,DMs,by.x=0,by.y=1)
-row.names(AnnotationRows)=AnnotationRows$Row.names ; AnnotationRows=AnnotationRows[,-1]
-colnames(AnnotationRows)[3:4]=c("DM_kind","DM_sign")
-
-return(AnnotationRows)
-} 
-```
-
-```{r ENCODE_heatmap, fig.height=10, fig.width=8}
-ENCODE_heatmap <- function(matSignalSel,AnnotationRows,colsAnnot) {
-breaks=seq(-4,4,.1) ; cols=colorRampPalette(c("purple","black","yellow"))(length(breaks)+1)
-
-allZero=which(apply(matSignalSel,1,sum)==0)
-if (length(allZero)>0) matSignalSel=matSignalSel[-allZero,]
-
-pheatmap(matSignalSel, 
-         scale = "row", cluster_rows = T, cluster_cols = F,
-         clustering_method = "ward.D2",
-         show_rownames = F, show_colnames = F,
-         annotation_col = subset(AnnotationCols,row.names(AnnotationCols) %in% markOrder), 
-         annotation_row = AnnotationRows, 
-         annotation_colors = colsAnnot,
-         color = cols, border = NA,
-         #main = paste(mark,"DM peaks",length(DMpeaks)),
-         fontsize = 12)
-}
-```
-
-## All H3K4me2 peaks
-
-```{r ENCODE_counts, fig.height=10, fig.width=8, cache=TRUE}
-selPeaksAll=row.names(countNormLength_H3K4me2)
-selPeaks=selPeaksAll
-matSignalSel=na.omit(matSignal[selPeaks,markOrder])
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/AnnotationRows_allPeaks.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-AnnotationRows=ENCODE_stats(matSignalSel,selPeaks,mergedPeaks_annotTXS,DMs)
-AnnotationRows_allPeaks=AnnotationRows
-save(AnnotationRows_allPeaks,file=fileOut)}
-
-# Tables
-AnnotationRows=AnnotationRows_allPeaks
-t=table(AnnotationRows$CS)
-kable(ceiling(t/sum(t)*100),col.names = c("CS","Freq"))
-
-t1=table(AnnotationRows$Localization)
-tt=table(AnnotationRows$CS,AnnotationRows$Localization)
-kable(ceiling(t1/sum(t1)*100),col.names = c("Localization","Freq"))
-kable(ceiling(tt/as.numeric(t)*100),row.names = T)
-# 
-# t1=table(AnnotationRows$DM_sign)
-# kable(ceiling(t1/sum(t1)*100),col.names = c("H3K4me2 Profile","Freq"))
-# tt=table(AnnotationRows$CS,AnnotationRows$DM_sign)
-# kable(ceiling(tt/as.numeric(t)*100),row.names = T)
-# 
-# t1=table(AnnotationRows$DM_kind)
-# kable(ceiling(t1/sum(t1)*100),col.names = c("H3K4me2 Dynamic","Freq"))
-# tt=table(AnnotationRows$CS,AnnotationRows$DM_kind)
-# kable(ceiling(tt/as.numeric(t)*100),row.names = T)
-
-ENCODE_heatmap(matSignalSel,AnnotationRows,colsAnnot)
-```
-
-# Multi factorial analysis
-
-Multi factorial analysis (MFA) (Escofier & Pages, Computational Statistics & Data Analysis, 1994) studies several groups of variables (numerical and/or categorical) defined on the same set of individuals. MFA is a factor analysis applied to the array including all groups of variables. Roughly, the behaviour of the method is equivalent to PCA (concerning quantitative variables) or to MCA (concerning qualitative variables).
-
-
-```{r MFA_functions}
-#http://juliejosse.com/wp-content/uploads/2019/03/MFA_staf2015.pdf
-
-individualContributionPlot <- function(mfa.res,nPeaks) {
-contribs=data.frame()
-for (i in 1:5) {
-rs=mfa.res[["ind"]][["contrib"]][order(mfa.res[["ind"]][["contrib"]][,i],decreasing = TRUE),i] %>% cumsum() 
-peakId=names(rs)
-contribs=rbind(contribs,data.frame(peakId,seq(1,length(rs),1),as.vector(rs),rep(i,length(rs))))}
-colnames(contribs)=c("peakId","Rank","RankedSum","Dimension")
-contribs$Dimension=factor(contribs$Dimension)
-
-p <- ggplot(contribs,aes(x=Rank,y=RankedSum)) + geom_point(aes(color=Dimension))
-
-print(p)
-
-return(contribs)
-}
-
-MFAstats <- function(mfa.res,xlim,ylim) {
-# Eigen values of separate analysis
-# Describe dimensionality of each variable group
-#print(kable(sapply(mfa.res[["separate.analyses"]],"[[", 1),
-#            caption = "Eigen values of separate analysis"))
-# Eigen values of global analysis
-# How many components are necessary to describe the total inertia of the dataset
-print(kable(mfa.res[["eig"]][1:10,],
-            caption = "Eigen values of global analysis"))
-# Correlation coeffs
-# How much of the stt defined by component C depends on groups(s) G : G -> C
-# Fundamental to see if there are structures COMMON to groups -> useful to integrate 
-print(kable(mfa.res[["group"]][["correlation"]],caption = "Group correlation with factor"))
-# Inertia or group contribution 
-# How much of the inertia of group G is explained by component C : C -> G
-print(kable(mfa.res[["group"]][["cos2"]],caption = "Group inertia per factor"))
-# Group contribution 
-plotGrouprepresentation(mfa.res)
-# Invidual contribution to each component
-# Determine wheter an axis is due to only some individuals 
-#mfa.res[["ind"]][["contrib"]]
-#individualContributions=individualContributionPlot(mfa.res,nPeaks)
-# Categorical variable representation (centroid of inds )
-# Determine how are 
-#dimdesc(mfa.res)$Dim.2
-#splotIndrepresentation(mfa.res,xlim,ylim)
-# Variable contribution 
-plotVarrepresentation(mfa.res)
-}
-
-plotGrouprepresentation <- function(mfa.res) {
-p1 <- plot.MFA(mfa.res,choix = "group",axes = c(1,2), title = "")
-p2 <- plot.MFA(mfa.res,choix = "group",axes = c(1,3), title = "")
-p3 <- plot.MFA(mfa.res,choix = "group",axes = c(1,4), title = "")
-p4 <- plot.MFA(mfa.res,choix = "group",axes = c(1,5), title = "")
-print((p1 | p2) / (p3 | p4))
-}
-
-plotIndrepresentation <- function(mfa.res,xlim,ylim) {
-  if (ylim!="" & xlim!="") {
-p1 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,2), 
-               title = "",ylim=ylim,xlim=xlim)
-p2 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,3),
-               title = "",ylim=ylim,xlim=xlim)
-p3 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,4),
-               title = "",ylim=ylim,xlim=xlim)
-p4 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,5),           
-               title = "",ylim=ylim,xlim=xlim) } else  
-{
-p1 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,2), title = "")
-p2 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,3), title = "")
-p3 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,4), title = "")
-p4 <- plot.MFA(mfa.res,choix = "ind", invisible = "ind", axes = c(1,5), title = "")
-  
-}
-print((p1 | p2) / (p3 | p4))
-}
-
-plotVarrepresentation <- function(mfa.res) {
-  title=""
-  p1 <- plot.MFA(mfa.res,choix="var", axes=c(1,2),title=title,cex=.7)
-  p2 <- plot.MFA(mfa.res,choix="var", axes=c(1,3),title=title,cex=.7)
-  p3 <- plot.MFA(mfa.res,choix="var", axes=c(1,4),title=title,cex=.7)
-  p4 <- plot.MFA(mfa.res,choix="var", axes=c(1,5),title=title,cex=.7)
-  
-print((p1 | p2) / (p3 | p4) + plot_layout(guides = "collect"))
-
-}
-
-clusterStats <- function(hcpc.res) {
-### Cluster description
-## By variables: 
-# Test if variable (OMIC samples) values distribution per cluster is random or not 
-# For quantitative variables
-print(kable(hcpc.res[["desc.var"]]$quanti,caption = "Description by quantitative variables"))
-# For qualitative variables
-print(kable(hcpc.res[["desc.var"]][["category"]],caption = "Description by qualitative variables"))
-## By components 
-# Test if individuals coordinates distribution per cluster is random or not 
-# For quantitative variables
-print(kable(hcpc.res[["desc.axes"]][["quanti"]],
-            caption = "Description by factors (quantitative variables)"))
-# For qualitative variables
-print(kable(hcpc.res[["desc.var"]][["test.chi2"]],
-            caption = "Description by factors (qualitative variables)"))
-## By individuals (genomic regions)
-# Nearest individuals to the cluster's centroid
-print("Description by individuals (nearest):")
-print(hcpc.res[["desc.ind"]]$para)
-# Furthest individuals to the cluster's centroid
-print("Description by individuals (furthest):")
-print(hcpc.res[["desc.ind"]]$dist)
-}
-
-```
-
-```{r MFA_prepareMtx}
-# Differential analysis info
-AnnotationRows=AnnotationRows_allPeaks[,2:4]
-#DEGs2peaks=merge(DEGs,peaks2GeneTgene_H3K4me2[,c(3,1)],by=1)
-#AnnotationRows=merge(AnnotationRows,DEGs2peaks[,c(5,4,3,1)],by.x=0,by.y=1)
-DEGs2peaks=merge(DEGs,peaks2GeneTgene_H3K4me2,by.x=1,by.y=2)
-AnnotationRows=merge(AnnotationRows,DEGs2peaks[,c(5,3,4,1,2,6:8)],by.x=0,by.y=1)
-AnnotationRows$Distance=log(abs(AnnotationRows$Distance)+1,10)
-#AnnotationRows=AnnotationRows[!duplicated(AnnotationRows[,1]),]
-#AnnotationRows <- AnnotationRows[,-c(1,8)] ; 
-colnames(AnnotationRows)[5:6] <- c("DEG_kind","DEG_sign")
-AnnotationRows$DEG_kind=factor(AnnotationRows$DEG_kind,levels = c("common","only_H3vsUI","only_DIvsD7","notDE"))
-#AnnotationRows$DEG_sign=factor(AnnotationRows$DEG_sign,levels = c("Up","Down","UpDown","DownUp"))
-AnnotationRows$DEG_sign=factor(AnnotationRows$DEG_sign,levels = c("Up","Down"))
-#Re-order columns
-AnnotationRows=AnnotationRows[,c(1,7:11,2:6)]
-
-# Merge methylation, transcription and epigenome signal
-METHOME=merge(t(scale(t(countNormLength_H3K4me2))),
-              DM_results$D7vsUI[,1:2],by=0)
-colnames(METHOME)[8:9]=c("H3K4me2_log2baseMean","H3K4me2_D7vsUI_log2FC")
-METHOME$H3K4me2_log2baseMean=log(METHOME$H3K4me2_log2baseMean,2)
-
-# Merge TOME data
-Data=Data_RMA_NoBatchComBat_Annot
-t=na.omit(Data[!duplicated(Data$ENTREZID),])[,c(1,5,9,2,6,10,3,7,11,4,8,12)]
-TOME=log(t,2)
-row.names(TOME)=na.omit(Data[!duplicated(Data$ENTREZID),14])
-
-# Merge ENCODE data
-samples=AnnotationCols[markOrder,]
-names(samples)=markOrder
-ENCODEmean=t(scale(t(
-  data.frame(
-  H3K4me3=apply(matSignal[,names(samples[samples=="H3K4me3"])],1,function(x) mean((x))),
-  H3K4me2=apply(matSignal[,names(samples[samples=="H3K4me2"])],1,function(x) mean((x))),
-  H3K4me1=apply(matSignal[,names(samples[samples=="H3K4me1"])],1,function(x) mean((x))),
-  H3K27ac=apply(matSignal[,names(samples[samples=="H3K27ac"])],1,function(x) mean((x))),
-  H3K79me2=apply(matSignal[,names(samples[samples=="H3K79me2"])],1,function(x) mean((x))),
-  H3K27me3=apply(matSignal[,names(samples[samples=="H3K27me3"])],1,function(x) mean((x))),
-  row.names = row.names(matSignal))
-)))
-
-# EPITOME all 
-EPITOME=merge(AnnotationRows,METHOME,by=1)
-EPITOME=merge(EPITOME,ENCODEmean,by.x=1,by.y=0)
-EPITOME=merge(EPITOME,TOME,by.x=2,by.y=0)
-row.names(EPITOME)=paste0(EPITOME$Row.names,"_",EPITOME$ENTREZID)
-colnames(EPITOME)[2]="peakID"
-```
-
-```{r MFA_getMtx1}
-
-#### Get INPUT matrix
-
-# Select all peak2gene associations where there is a DMR or a DEG
-mfa.EPITOME=subset(EPITOME,EPITOME$DM_kind=="DM" | EPITOME$DEG_kind!="notDE")
-corrTh=.4
-mfa.EPITOME_corrTh=subset(EPITOME,(EPITOME$DM_kind=="DM" | EPITOME$DEG_kind!="notDE") &
-                    abs(EPITOME$Correlation)>corrTh)
-distTh=20e3
-mfa.EPITOME_distTh=subset(EPITOME,(EPITOME$DM_kind=="DM" | EPITOME$DEG_kind!="notDE") &
-                   EPITOME$Distance<log(distTh+1,10))
-# Save INPUT matix objects
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.EPITOME.RData")
-if (!file.exists(fileOut)) {save(mfa.EPITOME,file = fileOut)}
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.EPITOME_distTh.RData")
-if (!file.exists(fileOut)) {save(mfa.EPITOME_distTh,file = fileOut)}
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.EPITOME_corrTh.RData")
-if (!file.exists(fileOut)) {save(mfa.EPITOME_corrTh,file = fileOut)}
-
-```
-
-## Input matrix
-
-MFA is performed on the `r dim(mfa.EPITOME)[1]` peak-gene associations that include a DMR or a DEG. This is the distribution of quantitative and qualitative group variables.
-
-```{r MFA_getMtx2}
-# Check matrix behaviour
-boxplot(mfa.EPITOME[,c(26:37,18:19,4,5,20:25)],las=2,pch=19,cex=.5,
-        ylab="Scaled values by group",cex.axis=.8,
-        col=c(rep(palette()[8],12),rep(palette()[6],2),
-              rep(palette()[3],1),rep(palette()[4],1),rep(palette()[5],6)),
-        main="Distribution of quantitative variables for MFA")
-
-kable(table(mfa.EPITOME$DEG_kind,mfa.EPITOME$DEG_sign),caption = "DEGs for MFA")
-kable(table(mfa.EPITOME$DM_kind,mfa.EPITOME$DM_sign),caption = "DMRs for MFA")
-```
-
-## MFA components description
-
-```{r MFA, fig.height=10, fig.width=10, cache=TRUE}
-
-#### MFA
-colInds=c(26:37,18:19,4,5,20:25,7,10:11,8:9)
-
-# fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_distTh.RData")
-# if (file.exists(fileOut)) {load(fileOut)} else {
-#   mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_distTh  <- MFA(mfa.EPITOME_distTh[,colInds], ncp=5, 
-#     group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-#     name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(6))
-# save(mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_distTh,file=fileOut)}
-# 
-# fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_corrTh.RData")
-# if (file.exists(fileOut)) {load(fileOut)} else {
-#   mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_corrTh  <- MFA(mfa.EPITOME_corrTh[,colInds], ncp=5, 
-#     group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-#     name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(6))
-# save(mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_corrTh,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-  mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS  <- MFA(mfa.EPITOME[,colInds], ncp=5, 
-    group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-    name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(6))
-save(mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS,file=fileOut)}
-
-#### MFA stats
-mfa.res=mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS
-title="" ; xlim=c(-1,1) ; ylim=c(-1,1)
-MFAstats(mfa.res,"","")
-#print(plot.MFA(mfa.res,choix="axes", axes=c(1,2),title=title))
-plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(1,2), ylim=c(-1,2))
-p1 <- plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(1,2), xlim=xlim, ylim=ylim)
-p2 <- plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(3,2), xlim=xlim, ylim=ylim)
-p3 <- plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(3,4), xlim=xlim, ylim=ylim)
-p4 <- plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(5,4), xlim=xlim, ylim=ylim)
-print((p1 | p2) / (p3 | p4))
-```
-
-# Hierarchical clustering on MFA components
-
-Clustering is performed on the first 5 components obtained with the MFA.
-
-```{r HCPCclustering, fig.height=10, fig.width=10, cache=TRUE}
-
-#### Clustering
-nClust=12 ; maxClustSize=1400
-hcpc.res <- HCPC(mfa.res, nb.clust=nClust, conso=0, min=3, max=10,
-                 metric = "euclidean", method="ward",graph = FALSE,nb.par = maxClustSize)
-
-#clusterStats(hcpc.res)
-
-# Cluster description in term of qualitative variables
-lapply(hcpc.res[["desc.var"]][["category"]],function(x) {kable(subset(x,x[,"v.test"]>5.3))})
-
-# Cluster description
-tmp=hcpc.res[["data.clust"]][,23:28]
-table=merge(mfa.EPITOME[,1:6],tmp,by=0)
-colnames(table)[1]="peakIDgene_association"
-colnames(table)[13]="MFAclust"
-
-layout(matrix(1:2,nrow = 2))
-boxplot(table$Distance ~ table$MFAclust,pch=19,cex=.5,
-        xlab="",ylab="Distance peak - gene")
-boxplot(table$Correlation ~ table$MFAclust,pch=19,cex=.5,
-        xlab="MFA cluster",ylab="Correlation peak_hist-gene_exp")
-
-# Table of associations with clusters
-fileOut=paste0(WDIR,"/results_project12891/tables/integration/mfa_clust_TOMEMETHDISTCORREPIGDEGDMR_CS.txt")
-if (!file.exists(fileOut)) {
-# Add individual data
-for (i in 1:nClust) {
-  table=merge(table,as.data.frame(hcpc.res[["desc.ind"]][["para"]][[i]]),by.x=1,by.y=0,all.x=TRUE)
-  colnames(table)[dim(table)[2]]=paste0("distFromCentroid_MFAclust",i)
-}
-
-write.table(table,file = fileOut,sep="\t",quote=FALSE,row.names = FALSE, col.names = TRUE)
-}
-
-```
-
-## Statistics on clusters
-
-Number of total genes/peaks per cluster (Ngenes,Npeaks). Number of genes/peaks belonging only to a given cluster (NgeneOnlyClust, NpeakOnlyClust). Nearest cluster in terms of shared genes/peaks (nearestClustGene,nearestClustPeak) and corresponding number of shared genes/peaks (nearestClustNgene,nearestClustNpeak).
-
-```{r}
-geneFreq_allAss=table(table$ENTREZID)
-singleClustGenes=names(subset(geneFreq_allAss,geneFreq_allAss==1))
-peakFreq_allAss=table(table$peakID)
-singleClustPeaks=names(subset(peakFreq_allAss,peakFreq_allAss==1))
-clusts=split(table,table$MFAclust)
-geneClust=unique(table[,c(2,13)])
-peakClust=unique(table[,c(3,13)])
-
-clustStats=data.frame(Counts=c("Ngenes","NgeneOnlyClust","nearestClustGene","nearestClustNgene",
-                               "Npeaks","NpeaksOnlyClust","nearestClustPeak","nearestClustNpeak")) ; i=1
-for (clust in clusts) {
-  Ngenes=length(unique(clust$ENTREZID))
-  NgeneOnlyClust=length(setdiff(unique(clust$ENTREZID),
-                        subset(geneClust,geneClust$MFAclust!=i)$ENTREZID))
-  nearestClustGene=names(sort(table(geneClust[geneClust$ENTREZID %in% unique(clust$ENTREZID),]$MFAclust),decreasing = T)[2])
-  nearestClustNgene=as.numeric(sort(table(geneClust[geneClust$ENTREZID %in% unique(clust$ENTREZID),]$MFAclust),decreasing = T)[2])
-  Npeaks=length(unique(clust$peakID))
-  NpeaksOnlyClust=length(setdiff(unique(clust$peakID),
-                         subset(peakClust,peakClust$MFAclust!=i)$peakID))
-  nearestClustPeak=names(sort(table(peakClust[peakClust$peakID %in% unique(clust$peakID),]$MFAclust),decreasing = T)[2])
-  nearestClustNpeak=as.numeric(sort(table(peakClust[peakClust$peakID %in% unique(clust$peakID),]$MFAclust),decreasing = T)[2])
-  clustStats=cbind(clustStats,c(Ngenes,NgeneOnlyClust,nearestClustGene,nearestClustNgene,Npeaks,NpeaksOnlyClust,nearestClustPeak,nearestClustNpeak)) ; i=i+1
-}
-colnames(clustStats)=c("Counts",seq(1,nClust,1))
-kable(clustStats)
-```
-
-
-# Explore gene sets from functional analysis 
-
-Check representation of selected gene sets (GS) among genes of every MFA cluster. Values correspond to the ratio between the observed number of genes belonging to a given GS and the expected number per cluster. 
-
-```{r functions_genesetClusterRepresentation, cache=TRUE}
-getClustGSenrich <- function(table,selectedGSs,m_t2g_lists) {
-  tmp=unique(table[,c(2,13)])
-  t=table(tmp$MFAclust)
-  expected=t/sum(t)
-  overRep=data.frame(matrix(nrow = length(selectedGSs),ncol = nClust))
-  colnames(overRep)=seq(1,nClust,1) ; row.names(overRep)=selectedGSs
-  for (selectedGS in selectedGSs) {
-    ENTREZ_IDs=unique(c(m_t2g_lists[[selectedGS]]))
-    subTable=tmp[tmp$ENTREZID %in% ENTREZ_IDs,]
-  
-    # Clustering distribution
-    obs=table(subTable$MFAclust)/sum(table(subTable$MFAclust))
-  
-    overRep[selectedGS,] <- obs/expected
-  }
-    return(overRep)
-
-}
-
-m_t2g_df <- msigdbr(species = "Homo sapiens", category = "C2") %>% 
-  dplyr::select(gs_subcat, gs_name, entrez_gene)
-tmp=m_t2g_df[m_t2g_df$gs_subcat=="CP:REACTOME",]
-m_t2g_list_reactome=split(as.character(tmp$entrez_gene),gsub("REACTOME_","",tmp$gs_name))
-# tmp=m_t2g_df[m_t2g_df$gs_subcat=="CP:KEGG",]
-# m_t2g_list_kegg=split(as.character(tmp$entrez_gene),gsub("KEGG_","",tmp$gs_name))
-
-# m_t2g_lists=c(m_t2g_list_reactome,m_t2g_list_kegg)
-m_t2g_lists=m_t2g_list_reactome
-```
-
-## GS specific to 1st infection
-```{r genesetClusterRepresentation_only1st}
-  
-selectedGSs_only1st=c("NUCLEAR_RECEPTOR_TRANSCRIPTION_PATHWAY",
-              "CHEMOKINE_RECEPTORS_BIND_CHEMOKINES",
-              "CHROMATIN_MODIFYING_ENZYMES",
-              "PKMTS_METHYLATE_HISTONE_LYSINES",
-              "HATS_ACETYLATE_HISTONES",
-              "VESICLE_MEDIATED_TRANSPORT",
-              "MEMBRANE_TRAFFICKING",
-              "DNA_DOUBLE_STRAND_BREAK_RESPONSE",
-              "ANTIGEN_PROCESSING_UBIQUITINATION_PROTEASOME_DEGRADATION",
-              "PROCESSING_OF_DNA_DOUBLE_STRAND_BREAK_ENDS",
-              "DNA_DAMAGE_TELOMERE_STRESS_INDUCED_SENESCENCE")
-tab=getClustGSenrich(table,selectedGSs_only1st,m_t2g_lists)
-kable(tab,digits = rep(1,nClust))
-```              
-
-## GS specific to 2nd infection
-```{r genesetClusterRepresentation_only2nd}
-selectedGSs_only2nd=c("INFLUENZA_INFECTION",
-              "CELLULAR_RESPONSES_TO_EXTERNAL_STIMULI",
-              "INFECTIOUS_DISEASE","DISEASE",
-              "SIGNALING_BY_ROBO_RECEPTORS","SIGNALING_BY_NUCLEAR_RECEPTORS")
- tab=getClustGSenrich(table,selectedGSs_only2nd,m_t2g_lists)
-kable(tab,digits = rep(1,nClust))
-```
-
-## GS common to both infections
-```{r genesetClusterRepresentation_common}
-selectedGSs_common=c("INTERLEUKIN_4_AND_INTERLEUKIN_13_SIGNALING",
-              "SIGNALING_BY_INTERLEUKINS","DNA_DOUBLE_STRAND_BREAK_REPAIR","DNA_REPAIR",
-              "FOXO_MEDIATED_TRANSCRIPTION",
-              "NEGATIVE_REGULATION_OF_MAPK_PATHWAY",
-              "CYTOKINE_SIGNALING_IN_IMMUNE_SYSTEM",
-              "INTERLEUKIN_10_SIGNALING",
-              "HDR_THROUGH_SINGLE_STRAND_ANNEALING_SSA_",
-              "FORMATION_OF_SENESCENCE_ASSOCIATED_HETEROCHROMATIN_FOCI_SAHF_")
-tab=getClustGSenrich(table,selectedGSs_common,m_t2g_lists)
-kable(tab,digits = rep(1,nClust))
-
-```
-
-```{bash searchSpecificGenes, include=FALSE, eval=FALSE}
-cd /Volumes/bioit/12891_Chevalier_EpiMemStrep/results_project12891/tables/
-# Count genes only 2nd in all clusters
-awk '$(NF-2)>.4 {print $2}' transcriptome/Sig_H3vsUI_D7vsUI_DIvsD7_DIvsH3_Annot.txt> id ; 
-grep -f id integration/mfa_clust_TOMEMETHDISTCORREPIGDEGDMR_CS.txt|awk '{print $4,$(NF-12)}' | 
-sort -u | awk '{print $2}'|sort |uniq -c  
-
-# Find genes only 2nd in cluster 7
-awk '$(NF-2)>.4 {print $2}' transcriptome/Sig_H3vsUI_D7vsUI_DIvsD7_DIvsH3_Annot.txt> id
-grep -f id integration/mfa_clust_TOMEMETHDISTCORREPIGDEGDMR_CS.txt|
-awk '$(NF-12)==7 {print $4}' | sort -u 
-
-rm id
-
-```
-
-
-# References
-
-```{r}
-sessionInfo()
-```
-
-
-```{r MFA_inputMatrixBenchmark, include=FALSE, eval=FALSE}
-ass=sample(row.names(mfa.EPITOME_distTh), 500, replace = FALSE, prob = NULL)
-xlim=c(-.6,.6) ; ylim=c(-.6,.6)
-
-title="TOME-METH_DIST-CORR-EPIG-CS-DEG-DMR"
-colInds=c(26:37,18:19,4,5,20:25,7,10:11,8:9)
-mfa.res_TOMEMETH_DISTCORREPIGCSDEGDMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(3,4,5,6,7,8))
-
-title="TOME-METH-DIST-CORR_EPIG-CS-DEG-DMR"
-mfa.res_TOMEMETHDISTCORR_EPIGCSDEGDMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),
-  num.group.sup=c(5,6,7,8))
-
-title="TOME-METH-DIST-CORR-EPIG_CS-DEG-DMR"
-mfa.res_TOMEMETHDISTCORREPIG_CSDEGDMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(6,7,8))
-
-title="TOME-METH-DIST-CORR-EPIG-CS_DEG-DMR"
-mfa.res_TOMEMETHDISTCORREPIGCS_DEGDMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(7,8))
-
-title="TOME-METH-DIST-CORR-EPIG-CS-DEG_DMR"
-mfa.res_TOMEMETHDISTCORREPIGCSDEG_DMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(8))
-
-title="TOME-METH-DIST-CORR-EPIG-CS-DEG-DMR"
-mfa.res_TOMEMETHDISTCORREPIGCSDEGDMR_distTh <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"))
-
-title="TOME-METH-DMR-DEG-DIST-CORR-EPIG_CS"
-mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_distTh  <- MFA(mfa.EPITOME_distTh[ass,colInds], ncp=5, 
-  group=c(12,2,1,1,6,1,2,2), type=c(rep("c",5),rep("n",3)),
-  name.group=c("TOME","METH","DIST","CORR","EPIG","CS","DEG","DMR"),num.group.sup=c(6))
-
-results=list(TOMEMETH=mfa.res_TOMEMETH_DISTCORREPIGCSDEGDMR_distTh,
-     TOMEMETHDISTCORR=mfa.res_TOMEMETHDISTCORR_EPIGCSDEGDMR_distTh,
-     TOMEMETHDISTCORREPIG=mfa.res_TOMEMETHDISTCORREPIG_CSDEGDMR_distTh,
-     TOMEMETHDISTCORREPIGCS=mfa.res_TOMEMETHDISTCORREPIGCS_DEGDMR_distTh,
-     TOMEMETHDISTCORREPIGCSDEG=mfa.res_TOMEMETHDISTCORREPIGCSDEG_DMR_distTh,
-     TOMEMETHDISTCORREPIGCSDEGDMR=mfa.res_TOMEMETHDISTCORREPIGCSDEGDMR_distTh,
-     TOMEMETHDISTCORREPIGDEGDMR=mfa.res_TOMEMETHDISTCORREPIGDEGDMR_CS_distTh
-     )
-
-xlim=c(-1,1) ; ylim=c(-1,1)
-i=1
-for (mfa.res in results) {
-  title=names(results)[i] ; print(title)
-  print(kable(mfa.res[["eig"]][1:10,],caption = "Eigen values of global analysis"))
-  print(plot.MFA(mfa.res,choix="axes", axes=c(1,2),title=title))
-  print(plot.MFA(mfa.res,choix="var", axes=c(1,2),title=title))
-  print(plot.MFA(mfa.res,choix="var", axes=c(3,2),title=title))
-  print(plot.MFA(mfa.res,choix="var", axes=c(3,4),title=title))
-  plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(1,2))
-  plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(1,2), xlim=xlim, ylim=ylim)
-  plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(3,2), xlim=xlim, ylim=ylim)
-  plot.MFA(mfa.res,choix="ind", invisible="ind", axes=c(3,4), xlim=xlim, ylim=ylim)
-  i=i+1
-}
-
-MFAstats(mfa.res,xlim,ylim)
-
-###################################
-###################################
-# OTHER INPUT matrices
-
-mfa.EPITOME_notDM_distTh=subset(EPITOME,(EPITOME$DM_kind!="DM" & EPITOME$DEG_kind!="notDE") &
-                   EPITOME$Distance<log(distTh+1,10))
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_distTh.RData")
-colInds=c(26:37,18:19,10:11,8:9)
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_distTh2 <- MFA(mfa.EPITOME_distTh[,colInds], ncp=5, 
-    group=c(12,2,2,2), type=c(rep("c",2),rep("n",2)),
-    name.group=c("TOME","METH","DEG","DMR"),
-    num.group.sup=c(3,4))
-save(mfa.res_TOMEMETH_distTh2,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_EPIG.RData")
-colInds=c(26:37,18:19,7,10:11,8:9)
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_EPIG <- MFA(mfa.EPITOME[,colInds], ncp=5, 
-    group=c(12,2,1,2,2), type=c(rep("c",2),rep("n",3)),
-    name.group=c("TOME","METH","EPIG","DEG","DMR"),
-    num.group.sup=c(3,4,5))
-save(mfa.res_TOMEMETH_EPIG,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_EPIG_distTh.RData")
-colInds=c(26:37,18:19,7,10:11,8:9)
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_EPIG_distTh <- MFA(mfa.EPITOME_distTh[,colInds], ncp=5, 
-    group=c(12,2,1,2,2), type=c(rep("c",2),rep("n",3)),
-    name.group=c("TOME","METH","EPIG","DEG","DMR"),
-    num.group.sup=c(3,4,5))
-save(mfa.res_TOMEMETH_EPIG_distTh,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_EPIG_notDM_distTh.RData")
-colInds=c(26:37,18:19,7,10:11,8:9)
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_EPIG_notDM_distTh <- MFA(mfa.EPITOME_notDM_distTh[,colInds], ncp=5, 
-    group=c(12,2,1,2,2), type=c(rep("c",2),rep("n",3)),
-    name.group=c("TOME","METH","EPIG","DEG","DMR"),
-    num.group.sup=c(3,4,5))
-save(mfa.res_TOMEMETH_EPIG_notDM_distTh,file=fileOut)}
-
-
-```
-
-```{r MFAold, eval=FALSE, include=FALSE}
-
-selPeaks=unique(c(as.character(DMs[DMs$Kind=="DM",]$peakID),selPeaks_DEGs_nearest))
-selPeaks_nearest=unique(c(as.character(DMs[DMs$Kind=="DM",]$peakID),
-                  intersect(selPeaks_DEGs_nearest,selPeaksAllRep)))
-selPeaks_target=unique(c(as.character(DMs[DMs$Kind=="DM",]$peakID),
-                  intersect(selPeaks_DEGs_target,selPeaksAllRep)))
-nPeaks=length(selPeaks)
-
-selPeaks_nearest=unique(c(as.character(DMs[DMs$Kind=="DM",]$peakID),
-                  intersect(selPeaks_DEGs_nearest,selPeaksAllRep)))
-
-#### MFA 
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_EPIG_nearest.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_EPIG_nearest <- MFA(na.omit(EPITOME[selPeaks_nearest,c(7:18,5,6,25:30)]), ncp=5, 
-    group=c(12,2,2,2,2), type=c(rep("c",2),rep("n",3)),
-    name.group=c("TOME","METH","EPIG","DM","DEG"),
-    num.group.sup=c(4,5))
-save(mfa.res_TOMEMETH_EPIG_nearest,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEMETH_EPIG_target.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOMEMETH_EPIG_target <- MFA(na.omit(EPITOME[selPeaks_target,c(7:18,5,6,25:30)]), ncp=5, 
-    group=c(12,2,2,2,2), type=c(rep("c",2),rep("n",3)),
-    name.group=c("TOME","METH","EPIG","DM","DEG"),
-    num.group.sup=c(4,5))
-save(mfa.res_TOMEMETH_EPIG_target,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_EPIGMETH_nearest.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_EPIGMETH_nearest <- MFA(na.omit(EPITOME[selPeaks_nearest,c(5,6,19:30)]), ncp=5, 
-    group=c(2,6,1,1,2,2), type=c(rep("c",2),rep("n",4)),
-    name.group=c("METH","EPIGENOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(4))
-save(mfa.res_EPIGMETH_nearest,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_EPIGMETH_target.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_EPIGMETH_target <- MFA(na.omit(EPITOME[selPeaks_target,c(5,6,19:30)]), ncp=5, 
-    group=c(2,6,1,1,2,2), type=c(rep("c",2),rep("n",4)),
-    name.group=c("METH","EPIGENOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(4))
-save(mfa.res_EPIGMETH_target,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_EPIG_nearest.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_EPIG_nearest <- MFA(na.omit(EPITOME[selPeaks_nearest,c(19:30)]), ncp=5, 
-    group=c(6,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("EPIGENOME","LOC","CSkind","METH","DEG"),
-    num.group.sup=c(3))
-save(mfa.res_EPIG_nearest,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_EPIG_target.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_EPIG_target <- MFA(na.omit(EPITOME[selPeaks_target,c(19:30)]), ncp=5, 
-    group=c(6,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("EPIGENOME","LOC","CSkind","METH","DEG"),
-    num.group.sup=c(3))
-save(mfa.res_EPIG_target,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOME_nearest.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOME_nearest <- MFA(na.omit(EPITOME[selPeaks_nearest,c(7:18,25:30)]), ncp=5, 
-    group=c(12,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("TOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(5))
-save(mfa.res_TOME_nearest,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOME_target.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_TOME_target <- MFA(na.omit(EPITOME[selPeaks_target,c(7:18,25:30)]), ncp=5, 
-    group=c(12,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("TOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(5))
-save(mfa.res_TOME_target,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_TOMEonly.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-tmp=na.omit(EPITOME[selPeaks_nearest,c(7:18,29:30)])
-tmp=subset(tmp,tmp$DEG_sign!="DownUp" & tmp$DEG_sign!="UpDown")
-mfa.res_TOMEonly <- MFA(tmp, ncp=5, 
-    group=c(12,2), type=c(rep("c",1),rep("n",1)),
-    name.group=c("TOME","DEG"))
-save(mfa.res_TOMEonly,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_METHOME_nearest.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_METHOME_nearest <- MFA(na.omit(EPITOME[selPeaks_nearest,c(1:4,25:30)]), ncp=5, 
-    group=c(4,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("METHOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(4))
-save(mfa.res_METHOME_nearest,file=fileOut)}
-
-fileOut=paste0(WDIR,"/results_project12891/scripts/Robjects/mfa.res_METHOME_target.RData")
-if (file.exists(fileOut)) {load(fileOut)} else {
-mfa.res_METHOME_target <- MFA(na.omit(EPITOME[selPeaks_target,c(1:4,25:30)]), ncp=5, 
-    group=c(4,1,1,2,2), type=c(rep("c",1),rep("n",4)),
-    name.group=c("METHOME","LOC","CSkind","DM","DEG"),
-    num.group.sup=c(4))
-save(mfa.res_METHOME_target,file=fileOut)}
-
-
-mfa.res=mfa.res_TOMEMETH_EPIG_nearest
-mfa.res=mfa.res_EPIG_nearest
-mfa.res=mfa.res_EPIGMETH_nearest
-mfa.res=mfa.res_TOME_nearest
-mfa.res=mfa.res_TOMEonly ; nClust=8
-mfa.res=mfa.res_METHOME_nearest
-
-MFAstats(mfa.res)
-  
-
-#### Clustering
-
-hcpc.res <- HCPC(mfa.res, nb.clust=nClust, conso=0, min=3, max=10,
-                 metric = "euclidean")
-
-clusterStats(hcpc.res)
-
-# d=dimdesc(mfa.res)
-# 
-# #### Confidence ellipses around categories per variable
-# plotellipses(mfa.res,axes = c(2,3), means=FALSE)
-# plotellipses(mfa.res,axes = c(2,3),
-#              keepvar="DEG_kind",label = "none") ## for 1 variable
-
-```
-
-```{r}
-plot <- function(expected,obs,nClust) {
-  ggplot_df=rbind(as.data.frame(expected),as.data.frame(obs))
-  ggplot_df$Set=c(rep("All",nClust),rep("Subset",nClust))
-  colnames(ggplot_df)[1:2]=c("MFAcluster","Freq")
-  
-  p <- ggplot(data=ggplot_df, aes(x=Set, y=Freq, fill=MFAcluster)) +
-  geom_bar(stat="identity")+
-  scale_fill_brewer(palette="Paired")+
-  theme_minimal() + ggtitle(selectedGS)
-  print(p)
-
-}
-
-```
-
diff --git a/Wednesday/MFA/RData/AnnotationRows.RData b/Wednesday/MFA/RData/AnnotationRows.RData
new file mode 100644
index 0000000000000000000000000000000000000000..cb60233e617b42b24bbd0546cb70ae9528ed5245
Binary files /dev/null and b/Wednesday/MFA/RData/AnnotationRows.RData differ
diff --git a/Wednesday/MFA/RData/DEGs.RData b/Wednesday/MFA/RData/DEGs.RData
new file mode 100644
index 0000000000000000000000000000000000000000..c7a361070a07cf871b67e4039b7acbe08309e5e6
Binary files /dev/null and b/Wednesday/MFA/RData/DEGs.RData differ
diff --git a/Wednesday/MFA/RData/DE_results_Annot.RData b/Wednesday/MFA/RData/DE_results_Annot.RData
new file mode 100644
index 0000000000000000000000000000000000000000..f5e986e96e3b8164310994061a25f2d0bd64cf70
Binary files /dev/null and b/Wednesday/MFA/RData/DE_results_Annot.RData differ
diff --git a/Wednesday/MFA/RData/DM_results_H3K4me2.RData b/Wednesday/MFA/RData/DM_results_H3K4me2.RData
new file mode 100644
index 0000000000000000000000000000000000000000..151bb398221a7e5f40a079075185432fef1f4924
Binary files /dev/null and b/Wednesday/MFA/RData/DM_results_H3K4me2.RData differ
diff --git a/Wednesday/MFA/RData/DMpeaks_H3K4me2.RData b/Wednesday/MFA/RData/DMpeaks_H3K4me2.RData
new file mode 100644
index 0000000000000000000000000000000000000000..ff7c6adf0ac33fd6f20bf70c306bb4b22465eee4
Binary files /dev/null and b/Wednesday/MFA/RData/DMpeaks_H3K4me2.RData differ
diff --git a/Wednesday/MFA/RData/H3K4me2_allPeaks_CS.RData b/Wednesday/MFA/RData/H3K4me2_allPeaks_CS.RData
new file mode 100644
index 0000000000000000000000000000000000000000..e09d7df23eca1cab0c9540566c7cdc06a290d518
Binary files /dev/null and b/Wednesday/MFA/RData/H3K4me2_allPeaks_CS.RData differ
diff --git a/Wednesday/MFA/RData/TOME_RMAandNoBatch.RData b/Wednesday/MFA/RData/TOME_RMAandNoBatch.RData
new file mode 100644
index 0000000000000000000000000000000000000000..861f3d7d959f0faff01818727cebaeb7fd89b2da
Binary files /dev/null and b/Wednesday/MFA/RData/TOME_RMAandNoBatch.RData differ
diff --git a/Wednesday/MFA/RData/countNormLength_H3K4me2.RData b/Wednesday/MFA/RData/countNormLength_H3K4me2.RData
new file mode 100644
index 0000000000000000000000000000000000000000..b7befa2d82a48aaa80dc851703e96a581d0019e6
Binary files /dev/null and b/Wednesday/MFA/RData/countNormLength_H3K4me2.RData differ
diff --git a/Wednesday/MFA/RData/mfa.EPITOME.RData b/Wednesday/MFA/RData/mfa.EPITOME.RData
new file mode 100644
index 0000000000000000000000000000000000000000..f2c22b1fdd16878897898f124f22a37503771c29
Binary files /dev/null and b/Wednesday/MFA/RData/mfa.EPITOME.RData differ