From c5073d72a7ecf8c83eaea2a438057212ba0df9bc Mon Sep 17 00:00:00 2001
From: Cosmin  SAVEANU <cosmin.saveanu@pasteur.fr>
Date: Mon, 3 Jun 2019 14:09:27 +0200
Subject: [PATCH] Added the R script

---
 Categorical heatmaps/generate_heatmap.R | 65 +++++++++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 Categorical heatmaps/generate_heatmap.R

diff --git a/Categorical heatmaps/generate_heatmap.R b/Categorical heatmaps/generate_heatmap.R
new file mode 100644
index 0000000..114dbf8
--- /dev/null
+++ b/Categorical heatmaps/generate_heatmap.R	
@@ -0,0 +1,65 @@
+df <- read.table(file="data.txt", sep="\t", stringsAsFactors = F, header=T)
+maxes <- apply(df[, c(-1)], MARGIN=1, FUN=max)
+#find maximum values per row
+#the negative index corresponds to column 1, containing the gene identifiers
+normdf <- sweep(df[, c(-1)], MARGIN=1, FUN="/", maxes)
+#divide all the values by the corresponding maximal value
+dfn <- data.frame(Gene=df$Gene, normdf)
+
+library(reshape2)
+dfn.m <- melt(dfn)
+
+mybreaks <- c(-0.1, 0.25, 0.5, 0.75, 1.1)
+mylabels <- c("0-25%", "25-50%", "50-75%", "75-100%")
+numbertocategory<- function(vctr, brks, lbls){
+  return(cut(as.numeric(vctr), brks, lbls))
+}
+numbertocategory(dfn[1, -1], mybreaks, mylabels)
+# [1] 50-75%  50-75%  50-75%  75-100% 75-100% 75-100%
+#   Levels: 0-25% 25-50% 50-75% 75-100%
+
+categ <- apply(dfn[, -1], MARGIN=2, numbertocategory, mybreaks, mylabels)
+
+mydf <- data.frame(Gene=dfn$Gene, categ)
+#       Gene   Cell1   Cell2   Cell3   Cell4   Cell5   Cell6
+#   1   Gene1  50-75%  50-75%  50-75% 75-100% 75-100% 75-100%
+#   2   Gene2  25-50%  25-50%  25-50% 75-100% 75-100%  50-75%
+#   3   Gene3  25-50%  25-50%  50-75%  50-75%  25-50% 75-100%
+#   4   Gene4   0-25%   0-25%  25-50%  50-75% 75-100% 75-100% etc...
+
+nms <- names(mydf)
+lnms <- length(nms)
+rowsall <- length(mydf$Gene)
+colsall<- length(names(mydf[, c(2:lnms)]))
+rowIndex = rep(1:rowsall, times=colsall)
+#1  2  3  4  5  6  7  8  9 10 11 12
+#1  2  3  4  5  6  7  8  9 10 11 12 etc...
+colIndex = rep(1:colsall, each=rowsall)
+#1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 3
+#3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 5 5...etc
+mydf.m <- cbind(rowIndex, colIndex ,melt(mydf, id=c("Gene")))
+#     rowIndex colIndex Gene   variable value
+# 1         1        1  Gene1    Cell1  50-75%
+# 2         2        1  Gene2    Cell1  25-50%
+# 3         3        1  Gene3    Cell1  25-50%
+# 4         4        1  Gene4    Cell1   0-25%
+# 5         5        1  Gene5    Cell1 75-100%
+# 6         6        1  Gene6    Cell1 75-100%
+
+#NOW, the plotting:
+p <- ggplot(mydf.m, aes(variable, Gene))+
+  geom_rect(aes(x=NULL, y=NULL, xmin=colIndex-1,
+                      xmax=colIndex,ymin=rowIndex-1,
+                      ymax=rowIndex, fill=value), colour="grey")
+p <- p+scale_x_continuous(breaks=(1:colsall)-0.5, labels=colnames(mydf[, c(-1)]))
+p <- p+scale_y_continuous(breaks=(1:rowsall)-0.5, labels=mydf[, 1])
+p <- p+theme_bw(base_size=8, base_family = "Helvetica")+
+  theme(
+    panel.grid.major = element_blank(),
+    panel.grid.minor = element_blank(),
+    panel.border = element_blank(),
+    axis.ticks.length = unit(0, "cm"),
+    axis.text.x = element_text(vjust=0, angle=90, size=8)
+  )+ 
+  scale_fill_manual(values = c("white","yellow","orange", "red"))
+p
\ No newline at end of file
-- 
GitLab