varExp.R

#' Make sure SNPs in the input file and SNPs included 
#' in the correlation matrix are the same.
#'
#'This step is necessary because some SNPs in the input may be not 
#'found in the reference panel or excluded from the matrix after the pruning step.
#'
#' @param df is the meta-analysis results data frame
#' @param v is a vector of SNP identifiers
#' 
#' @return A data frame only with rows corresponding to SNPs in the vector \code{v}
#' 
#' @examples 
#' df <- data.frame(c("rs1", "rs2", "rs3"), rnorm(3))
#' v <- c("rs1", "rs3")
#' df2 <- checkInput(df, v)
#' 
#' @export
#'  
checkInput <- function(df, v) {
    if (!is.null(v)) {
        df[df$RSID %in% v, ]
    }
    else {
        df[1, ]
    }
}

################################################################################

#' Calculate the mean and variance of a quantitative variable in a pooled sample 
#' of several cohorts
#'
#' @param N is a vector of sample size in each cohorts
#' @param m is a vector of the mean of the variable in each individual cohort
#' @param v is a vector of the standard deviation of the variable in each 
#'   individual cohorts
#'
#' @return A vector of length 2 which first element is the mean and 
#'   second element is the variance
#'
#' @examples
#' sample_sizes <- c(250, 1000, 10000, 7500)
#' means <- c(2.5, 2.28, 2.32, 2.42)
#' sds <- c(1.05, 1.1, 0.98, 0.94)
#' parameters <- calculateParamsFromIndParams(sample_sizes, means, sds)
#'
#' @export
#'
calculateParamsFromIndParams <- function(N, m, v) {
    gmean <- sum(N * m) / sum(N)
    aa <- (N - 1) * v^2
    bb <- N * (m - gmean)^2
    c(gmean, sum(aa + bb) / (sum(N) - 1))
}

################################################################################

#' Calculate the mean and variance of a binary variable in a pooled sample 
#' of several cohorts
#'
#' @param N is a vector of sample size in each cohorts
#' @param n_expo is the vector of exposed individuals counts in each individual cohort
#'
#' @return A vector of length 2 which first element is the mean and 
#'   second element is the variance
#'
#' @examples
#' sample_sizes <- c(250, 1000, 10000, 7500)
#' exposed <- c(120, 412, 3211, 4300)
#' parameters <- calculateParamsFromCounts(sample_sizes, exposed)
#'
#' @export
#'
calculateParamsFromCounts <- function(N, n_expo) {
    meanval <- sum(n_expo) / sum(N)
    c(meanval, sum(N) / (sum(N) - 1) * meanval * (1 - meanval))
}

################################################################################

#' Derive betas in the standardized model from betas in the general model
#'
#' @param betaG is the vector of main genetic effects in the general model
#' @param betaINT is the vector of interaction effects in the general model
#' @param maf is the vector of the variants' frequency
#' @param meanE is the mean of the exposure
#' @param varE is the variance of the exposure
#' @param type designates the coefficients to standardize:
#'  "G" for the main genetic effect and "I" for the interaction effects
#'
#' @return The vector of standardized effects
#'
#' @examples
#' betaGs <- rnorm(10, 0, 0.1)
#' betaIs <- rnorm(10, 0, 0.05)
#' mafs <- runif(10, 0.05, 0.95)
#' meanE <- runif(1, -2, 2)
#' varE <- runif(1, 0.5, 1.5)
#' std_betaG <- standardizeBeta(betaGs, betaIs, mafs, meanE, varE, "G")
#' std_betaI <- standardizeBeta(betaGs, betaIs, mafs, meanE, varE, "I")
#'
#' tryCatch(
#'   std_betaI <- standardizeBeta(betaGs, betaIs, mafs, meanE, varE, 
#'                                "anything different from G or I"),
#'   error = function(e) print(e))
#'
#' @export
#' 
standardizeBeta <- function(betaG,betaINT,maf,meanE,varE,type) {
    if (type == "I") {
        return(betaINT * sqrt(2 * maf * (1 - maf)) * sqrt(varE))
    }
    else if (type == "G") {
        return((betaG + betaINT * meanE) * sqrt(2 * maf * (1 - maf)))
    }
    else {
        stop("type must be either \"I\" or \"G\"", call. = FALSE)
    }
}

################################################################################

#' Calculate the fraction of phenotypic variance explained by as set of 
#' significant genetic effect and/or interaction effects.
#'
#' This version takes into account potential rank deficiencies
#' in the correlation matrix and noise in the  effect size estimation.
#'
#' For more details, see Shi et al., Am. J. Hum. Genet., 2016
#'
#' @param std_bG is the vector of standardized genetic effect sizes
#' @param std_bI is the vector of standardized interaction effect sizes
#' @param matcor is the genotype correlation matrix
#' @param varY is the phenotypic variance in the pooled sample
#' @param N is the total sample size
#' @param type indicates wether only genetic or interactions effects should be 
#'   considered or if both should be considered jointly.
#'
#' @return The fraction of phenotypic variance explained by user-specified effects.
#'
#' @examples
#' std_bG <- rnorm(5, 0, 0.01)
#' std_bI <- rnorm(5, 0, 0.001)
#' matcor <- cor(matrix(runif(5*5, 1, 5), nrow = 5))
#' varY <- 2.25
#' N <- 100000
#' calculateVarExp(std_bG, std_bI, matcor, varY, N, "G")
#' calculateVarExp(std_bG, std_bI, matcor, varY, N, "I")
#' calculateVarExp(std_bG, std_bI, matcor, varY, N, "J")
#'
#' @importFrom MASS ginv
#' 
#' @export
#' 
calculateVarExp <- function(std_bG, std_bI, matcor, varY, N, type) {
    if (type == "G") {
        std_bI <- rep(0, length(std_bG))
    }
    else if (type == "I") {
        std_bG <- rep(0, length(std_bI))
    }
    else if (type != "J") {
        stop("type must be in c(\"G\", \"I\", \"J\")", call. = FALSE)
    }
    q <- qr(matcor)$rank
    inv <- ginv(matcor, tol = 0.0001)
    return((N * (crossprod(t(crossprod(std_bG, inv)), std_bG) + 
                     crossprod(t(crossprod(std_bI, inv)), std_bI)) -
                q) / ((N - q) * varY))
}

################################################################################