diff --git a/Categorical heatmaps/generate_heatmap.R b/Categorical heatmaps/generate_heatmap.R new file mode 100644 index 0000000000000000000000000000000000000000..114dbf84b7cbe6c9ef912f729cbd3bea53cc0d54 --- /dev/null +++ b/Categorical heatmaps/generate_heatmap.R @@ -0,0 +1,65 @@ +df <- read.table(file="data.txt", sep="\t", stringsAsFactors = F, header=T) +maxes <- apply(df[, c(-1)], MARGIN=1, FUN=max) +#find maximum values per row +#the negative index corresponds to column 1, containing the gene identifiers +normdf <- sweep(df[, c(-1)], MARGIN=1, FUN="/", maxes) +#divide all the values by the corresponding maximal value +dfn <- data.frame(Gene=df$Gene, normdf) + +library(reshape2) +dfn.m <- melt(dfn) + +mybreaks <- c(-0.1, 0.25, 0.5, 0.75, 1.1) +mylabels <- c("0-25%", "25-50%", "50-75%", "75-100%") +numbertocategory<- function(vctr, brks, lbls){ + return(cut(as.numeric(vctr), brks, lbls)) +} +numbertocategory(dfn[1, -1], mybreaks, mylabels) +# [1] 50-75% 50-75% 50-75% 75-100% 75-100% 75-100% +# Levels: 0-25% 25-50% 50-75% 75-100% + +categ <- apply(dfn[, -1], MARGIN=2, numbertocategory, mybreaks, mylabels) + +mydf <- data.frame(Gene=dfn$Gene, categ) +# Gene Cell1 Cell2 Cell3 Cell4 Cell5 Cell6 +# 1 Gene1 50-75% 50-75% 50-75% 75-100% 75-100% 75-100% +# 2 Gene2 25-50% 25-50% 25-50% 75-100% 75-100% 50-75% +# 3 Gene3 25-50% 25-50% 50-75% 50-75% 25-50% 75-100% +# 4 Gene4 0-25% 0-25% 25-50% 50-75% 75-100% 75-100% etc... + +nms <- names(mydf) +lnms <- length(nms) +rowsall <- length(mydf$Gene) +colsall<- length(names(mydf[, c(2:lnms)])) +rowIndex = rep(1:rowsall, times=colsall) +#1 2 3 4 5 6 7 8 9 10 11 12 +#1 2 3 4 5 6 7 8 9 10 11 12 etc... +colIndex = rep(1:colsall, each=rowsall) +#1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 3 +#3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 5 5...etc +mydf.m <- cbind(rowIndex, colIndex ,melt(mydf, id=c("Gene"))) +# rowIndex colIndex Gene variable value +# 1 1 1 Gene1 Cell1 50-75% +# 2 2 1 Gene2 Cell1 25-50% +# 3 3 1 Gene3 Cell1 25-50% +# 4 4 1 Gene4 Cell1 0-25% +# 5 5 1 Gene5 Cell1 75-100% +# 6 6 1 Gene6 Cell1 75-100% + +#NOW, the plotting: +p <- ggplot(mydf.m, aes(variable, Gene))+ + geom_rect(aes(x=NULL, y=NULL, xmin=colIndex-1, + xmax=colIndex,ymin=rowIndex-1, + ymax=rowIndex, fill=value), colour="grey") +p <- p+scale_x_continuous(breaks=(1:colsall)-0.5, labels=colnames(mydf[, c(-1)])) +p <- p+scale_y_continuous(breaks=(1:rowsall)-0.5, labels=mydf[, 1]) +p <- p+theme_bw(base_size=8, base_family = "Helvetica")+ + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_blank(), + axis.ticks.length = unit(0, "cm"), + axis.text.x = element_text(vjust=0, angle=90, size=8) + )+ + scale_fill_manual(values = c("white","yellow","orange", "red")) +p \ No newline at end of file