diff --git a/Density coloured scatterplots with ggplot2/density_coloured_scatterplots20210913.R b/Density coloured scatterplots with ggplot2/density_coloured_scatterplots20210913.R new file mode 100644 index 0000000000000000000000000000000000000000..0049176e9f1bebbf01050deff77ab359977a19a1 --- /dev/null +++ b/Density coloured scatterplots with ggplot2/density_coloured_scatterplots20210913.R @@ -0,0 +1,114 @@ +#File: density_coloured_scatterplots20210913.R +#Created September 13, 2021, by Cosmin SAVEANU +#Putting together a customized density coloured scatterplot +#version 1.0 + +library(ggplot2) +library(dplyr) +library(cowplot) +library(viridis) + + +##### Functions that can be used together for the scatter plot ###### + +# from https://stackoverflow.com/questions/13094827/how-to-reproduce-smoothscatters-outlier-plotting-in-ggplot +# thanks to the stackoverflow user "ninjaminb" https://stackoverflow.com/users/8933815/ninjaminb +densVals <- function(x, y = NULL, nbin = 128, bandwidth, range.x) { + dat <- cbind(x, y) + # limit dat to strictly finite values + sel <- is.finite(x) & is.finite(y) + dat.sel <- dat[sel, ] + # density map with arbitrary graining along x and y + map <- grDevices:::.smoothScatterCalcDensity(dat.sel, nbin, bandwidth) + map.x <- findInterval(dat.sel[, 1], map$x1) + map.y <- findInterval(dat.sel[, 2], map$x2) + # weighted mean of the fitted density map according to how close x and y are + # to the arbitrary grain of the map + den <- mapply(function(x, y) weighted.mean(x = c( + map$fhat[x, y], map$fhat[x + 1, y + 1], + map$fhat[x + 1, y], map$fhat[x, y + 1]), w = 1 / c( + map$x1[x] + map$x2[y], map$x1[x + 1] + map$x2[y + 1], + map$x1[x + 1] + map$x2[y], map$x1[x] + map$x2[y + 1])), + map.x, map.y) + # replace missing density estimates with NaN + res <- rep(NaN, length(sel)) + res[sel] <- den + res +} + + +corstring <- function(datax, datay){ + cor_x_vs_y <- cor.test(datax, datay) + pearson_r <- cor_x_vs_y$estimate + pearson_int <- cor_x_vs_y$estimate - cor_x_vs_y$conf.int[1] + pearson_df <- cor_x_vs_y$parameter + pearson_text <- sprintf("%s%.2f %s %.3f %s", + "r = ", + pearson_r, + "±", + pearson_int, + '(95% CI)') + pearson_txt2 <- sprintf("%s %d", + "N = ", + pearson_df+2) + return(paste(pearson_text, pearson_txt2, sep="\n")) +} + + + +correl_plot_log <- function(df, colx, coly, breaksx, labelsx, breaksy, labelsy, + xmin, xmax, ymin, ymax, cortext, xlabel, ylabel) +{ # uses densVals to select a subset of points for the plot to avoid overlplotting + # the x and y values are considered to be log2 transformed + dfd <- data.frame(x=df[, colx], y=df[, coly]) + dfd$point_density <- densVals(dfd$x, dfd$y) + corannotdf <- data.frame(x=log2(xmax/2), y=log2(ymax)) + ggplot(data=dfd, aes(x=x, y=y))+ + stat_density2d(geom = "raster", aes(fill = ..density..^0.8), contour = FALSE, n=200)+ + # the following line can be ommited if contour lines are not required + stat_density2d(aes(color=..level..), contour=TRUE, geom="contour")+ + # instead of viridis, other color pallettes can be used + # white is the first color to avoid having color in low density regions of the plot + scale_fill_gradientn(colours=c("white", "#9D80A4", viridis(n=10)))+ + # select only the 500 points from lower density region + geom_point(data = dplyr::top_n(dfd, 500, -point_density), col="#440154FF", size=1, pch=21, fill="#440154FF")+ + scale_x_continuous(breaks=breaksx, labels=labelsx, limits = c(log2(xmin), log2(xmax)))+ + scale_y_continuous(breaks=breaksy, labels=labelsy, limits = c(log2(ymin), log2(ymax)))+ + xlab(xlabel)+ + ylab(ylabel)+ + # the positioning of the labeling can be altered, or removed by commenting the following line + geom_text(data = corannotdf, x=corannotdf$x, y=corannotdf$y, + label=cortext, size=2)+ + # other themes can be indicated here + theme_cowplot()+ + # tweaks to the theme by hiding the legend and changing the size of the axis labels + theme(axis.title=element_text(size=8), + axis.text=element_text(size=8), + axis.text.x=element_text(angle=90, vjust=0.5, hjust=1), + legend.position = "none") +} + + +######## EXAMPLE ######## + +testdf <- data.frame(xbase = exp(rnorm(10000)), ybase = exp(rnorm(10000))) +# the transformation applied will affect the local density estimate +testdf$log2x <- log2(testdf$xbase) +testdf$log2y <- log2(testdf$ybase) + +# establish a custom scale, using log2 of values +xyvals=c(0.01, 0.1, 1, 10, 100, 1000) +breaksxy = log2(xyvals) +labelsxy=as.character(xyvals) + +# compute a correlation value that will be displayed on the plot +testdfcor <- corstring(testdf$log2x, testdf$log2y) + +# compute the density of points around each x,y pair +testdf$point_density <- densVals(testdf$log2x, testdf$log2y) + +correl_plot_log(testdf, "log2x", "log2y", + breaksxy, labelsxy, + breaksxy, labelsxy, + 0.01, 100, 0.01, 100, + testdfcor, "x (log2 scale)", "y (log2scale)")