Skip to content
Snippets Groups Projects
Select Git revision
  • c5073d72a7ecf8c83eaea2a438057212ba0df9bc
  • master default protected
2 results

generate_heatmap.R

Blame
  • generate_heatmap.R 2.63 KiB
    df <- read.table(file="data.txt", sep="\t", stringsAsFactors = F, header=T)
    maxes <- apply(df[, c(-1)], MARGIN=1, FUN=max)
    #find maximum values per row
    #the negative index corresponds to column 1, containing the gene identifiers
    normdf <- sweep(df[, c(-1)], MARGIN=1, FUN="/", maxes)
    #divide all the values by the corresponding maximal value
    dfn <- data.frame(Gene=df$Gene, normdf)
    
    library(reshape2)
    dfn.m <- melt(dfn)
    
    mybreaks <- c(-0.1, 0.25, 0.5, 0.75, 1.1)
    mylabels <- c("0-25%", "25-50%", "50-75%", "75-100%")
    numbertocategory<- function(vctr, brks, lbls){
      return(cut(as.numeric(vctr), brks, lbls))
    }
    numbertocategory(dfn[1, -1], mybreaks, mylabels)
    # [1] 50-75%  50-75%  50-75%  75-100% 75-100% 75-100%
    #   Levels: 0-25% 25-50% 50-75% 75-100%
    
    categ <- apply(dfn[, -1], MARGIN=2, numbertocategory, mybreaks, mylabels)
    
    mydf <- data.frame(Gene=dfn$Gene, categ)
    #       Gene   Cell1   Cell2   Cell3   Cell4   Cell5   Cell6
    #   1   Gene1  50-75%  50-75%  50-75% 75-100% 75-100% 75-100%
    #   2   Gene2  25-50%  25-50%  25-50% 75-100% 75-100%  50-75%
    #   3   Gene3  25-50%  25-50%  50-75%  50-75%  25-50% 75-100%
    #   4   Gene4   0-25%   0-25%  25-50%  50-75% 75-100% 75-100% etc...
    
    nms <- names(mydf)
    lnms <- length(nms)
    rowsall <- length(mydf$Gene)
    colsall<- length(names(mydf[, c(2:lnms)]))
    rowIndex = rep(1:rowsall, times=colsall)
    #1  2  3  4  5  6  7  8  9 10 11 12
    #1  2  3  4  5  6  7  8  9 10 11 12 etc...
    colIndex = rep(1:colsall, each=rowsall)
    #1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 3
    #3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 4 4 5 5...etc
    mydf.m <- cbind(rowIndex, colIndex ,melt(mydf, id=c("Gene")))
    #     rowIndex colIndex Gene   variable value
    # 1         1        1  Gene1    Cell1  50-75%
    # 2         2        1  Gene2    Cell1  25-50%
    # 3         3        1  Gene3    Cell1  25-50%
    # 4         4        1  Gene4    Cell1   0-25%
    # 5         5        1  Gene5    Cell1 75-100%
    # 6         6        1  Gene6    Cell1 75-100%
    
    #NOW, the plotting:
    p <- ggplot(mydf.m, aes(variable, Gene))+
      geom_rect(aes(x=NULL, y=NULL, xmin=colIndex-1,
                          xmax=colIndex,ymin=rowIndex-1,
                          ymax=rowIndex, fill=value), colour="grey")
    p <- p+scale_x_continuous(breaks=(1:colsall)-0.5, labels=colnames(mydf[, c(-1)]))
    p <- p+scale_y_continuous(breaks=(1:rowsall)-0.5, labels=mydf[, 1])
    p <- p+theme_bw(base_size=8, base_family = "Helvetica")+
      theme(
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.border = element_blank(),
        axis.ticks.length = unit(0, "cm"),
        axis.text.x = element_text(vjust=0, angle=90, size=8)
      )+ 
      scale_fill_manual(values = c("white","yellow","orange", "red"))
    p