Cute Little R Functions contains 40 functions for R/RStudio that facilitate basic procedures in 1) object analysis, 2) object modification, 3) graphic handling and 4) log file management.
Cute Little R Functions contains 41 functions for R/RStudio that facilitate basic procedures in 1) object analysis, 2) object modification, 3) graphic handling and 4) log file management.
The function names are:
...
...
@@ -26,8 +26,8 @@ fun_num2color_mat() #### convert a numeric matrix into hexadecimal color matrix
fun_by_case_matrix_op() #### assemble several matrices with operation
fun_mat_inv() #### return the inverse of a square matrix
fun_mat_fill() #### fill the empty half part of a symmetric square matrix
fun_consec_pos_perm() #### progressively breaks a vector order
fun_perm() #### progressively breaks a vector order
fun_perm_consec() #### progressively breaks a vector order
# reorder the elements of the data1 vector by flipping 2 randomly selected consecutive positions either:
# reorder the elements of the data1 vector by flipping 2 randomly selected positions either:
# 1) n times (when n is precised) or
# 2) until the correlation between data1 and data2 decreases down to the cor.limit (0.2 by default). See cor.limit below to deal with negative correlations
# Example of consecutive position flipping: ABCD -> BACD -> BADC, etc.
# Example of position flipping: ABCD -> DBCA -> DACB, etc.
# ARGUMENTS
# data1: a vector of at least 2 elements. Must be numeric if data2 is specified
# data2: a numeric vector of same length as data1
# n: number of times "flipping 2 randomly selected consecutive positions". Ignored if data2 is specified
# n: number of times "flipping 2 randomly selected positions". Ignored if data2 is specified
# seed: integer number used by set.seed(). Write NULL if random result is required, an integer otherwise. BEWARE: if not NULL, fun_permut() will systematically return the same result when the other parameters keep the same settings
# count.print: interger value. Print a working progress message every count.print during loops. BEWARE: can increase substentially the time to complete the process using a small value, like 10 for instance. Use Inf is no loop message desired
# text.print: optional message to add to the working progress message every count.print loop
# code that protects set.seed() in the global environment
# see also Protocol 100-rev0 Parallelization in R.docx
if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment
pos <- ini.pos[1:2] # 2 position in initial position, without the last because always up permutation (pos -> pos+1 & pos+1 -> pos)
tempo.count.print <- count.print # for the printing message
count <- 0
tempo.cor <- 0
# end variable allocation before the loops to save time
permut.done <- TRUE
if(is.null(data2)){
if(length(table(data1)) == 1){
tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data1 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data1)))
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
permut.done <- FALSE
}else{
if(tempo.count.print > n){
tempo.count.print <- n
}
# pos.check <- NULL
for(i2 in 1:n){
count[1] <- count + 1
pos[] <- sample.int(n = pos.seq.max, size = 2, replace = FALSE) # random sample of a position to permute, sample.int samples in 1:pos.seq.max. Or sample(x = pos.seq, size = 1, replace = FALSE) but slower
tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data1 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data1)))
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
tempo.cor[1] <- 1
}else if(length(table(data2)) == 1){
tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data2 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data2)))
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
tempo.cor[1] <- 1
}else{
cor.ini <- cor(x = data1, y = data2, use = "pairwise.complete.obs", method = cor.method)
tempo.cor[1] <- cor.ini # correlation that will be modified during loops
neg.cor <- FALSE
if(tempo.cor < 0){
tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE cor.limit PARAMETER WILL BE SWITCHED TO THE NEGATIVE EQUIVALENT: ", -cor.limit)
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
neg.cor[1] <- TRUE
tempo.cor[1] <- abs(tempo.cor)
}
if(tempo.cor < cor.limit){ # randomize directly all the position to be close to correlation zero
tempo.warnings <- paste0("INITIAL ABSOLUTE VALUE OF THE ", toupper(cor.method), " CORRELATION ", fun_round(tempo.cor), " BETWEEN data1 AND data2 HAS BEEN DETECTED AS BELOW THE CORRELATION LIMIT PARAMETER ", cor.limit, "\nTHE data1 SEQUENCE HAS BEEN COMPLETELY RANDOMIZED TO CORRESPOND TO CORRELATION ZERO")
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
for(i1 in 1:5){ # done 5 times to be sure of the complete randomness
# as fun_permut() except that reorder the elements of the data1 vector by flipping 2 randomly selected consecutive positions either:
# 1) n times (when n is precised) or
# 2) until the correlation between data1 and data2 decreases down to the cor.limit (0.2 by default). See cor.limit below to deal with negative correlations
# Example of consecutive position flipping: ABCD -> BACD -> BADC, etc.
# ARGUMENTS
# data1: a vector of at least 2 elements. Must be numeric if data2 is specified
# data2: a numeric vector of same length as data1
# n: number of times "flipping 2 randomly selected consecutive positions". Ignored if data2 is specified
# seed: integer number used by set.seed(). Write NULL if random result is required, an integer otherwise. BEWARE: if not NULL, fun_permut() will systematically return the same result when the other parameters keep the same settings
# count.print: interger value. Print a working progress message every count.print during loops. BEWARE: can increase substentially the time to complete the process using a small value, like 10 for instance. Use Inf is no loop message desired
# text.print: optional message to add to the working progress message every count.print loop
# cor.method: correlation method. Either "pearson", "kendall" or "spearman". Ignored if data2 is not specified
# cor.limit: a correlation limit (between 0 and 1). Ignored if data2 is not specified. Compute the correlation between data1 and data2, permute the data1 values, and stop the permutation process when the correlation between data1 and data2 decreases down below the cor limit value (0.2 by default). If cor(data1, data2) is negative, then -cor.limit is used and the process stops until the correlation between data1 and data2 increases up over cor.limit (-0.2 by default). BEWARE: write a positive cor.limit even if cor(data1, data2) is known to be negative. The function will automatically uses -cor.limit. If the initial correlation is already below cor.limit (positive correlation) or over -cor.limit (negative correlation), then the data1 value positions are completely randomized (correlation between data1 and data2 is expected to be 0)
# warn.print: logical. Print warnings at the end of the execution? No print if no warning messages
# path.lib: absolute path of the required packages, if not in the default folders
# REQUIRED PACKAGES
# lubridate
# REQUIRED FUNCTIONS FROM CUTE_LITTLE_R_FUNCTION
# fun_check()
# fun_pack()
# fun_round()
# RETURN
# a list containing:
# $data: the modified vector
# $warnings: potential warning messages (in case of negative correlation when data2 is specified). NULL if non warning message
# $cor: a spearman correlation between the initial positions (1:length(data1) and the final positions if data2 is not specified and the final correlation between data1 and data2 otherwise, according to cor.method
# $count: the number of loops used
# EXAMPLES
# example (1) showing that for loop, used in fun_permut_consec(), is faster than while loop
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_check() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n")
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_pack() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n")
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_pack() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n")
stop(tempo.cat)
}
# end required function checking
# argument checking
arg.check <- NULL # for function debbuging
checked.arg.names <- NULL # for function debbuging
tempo <- fun_check(data = data1, class = "vector", fun.name = function.name) ; eval(ee)
if(tempo$problem == FALSE & length(data1) < 2){
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 ARGUMENT MUST BE A VECTOR OF MINIMUM LENGTH 2. HERE IT IS: ", length(data1),"\n\n================\n\n")
cat(tempo.cat)
arg.check <- c(arg.check, TRUE)
}
if( ! is.null(data2)){
tempo <- fun_check(data = data1, class = "vector", mode = "numeric", fun.name = function.name) ; eval(ee)
if(tempo$problem == TRUE){
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 MUST BE A NUMERIC VECTOR IF data2 ARGUMENT IS SPECIFIED\n\n================\n\n")
cat(tempo.cat)
arg.check <- c(arg.check, TRUE)
}
tempo <- fun_check(data = data2, class = "vector", mode = "numeric", fun.name = function.name) ; eval(ee)
if(length(data1) != length(data2)){
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 AND data2 MUST BE VECTOR OF SAME LENGTH. HERE IT IS ", length(data1)," AND ", length(data2), "\n\n================\n\n")
cat(tempo.cat)
arg.check <- c(arg.check, TRUE)
}
}else if(is.null(n)){
tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": n ARGUMENT CANNOT BE NULL IF data2 ARGUMENT IS NULL\n\n================\n\n")
cat(tempo.cat)
arg.check <- c(arg.check, TRUE)
}
if( ! is.null(n)){
tempo <- fun_check(data = n, class = "vector", typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = function.name) ; eval(ee)
tempo.cat <- paste0("\n\n============\n\nERROR IN ", function.name, ": \nDIRECTORY PATH INDICATED IN THE path.lib PARAMETER DOES NOT EXISTS: ", path.lib, "\n\n============\n\n")
cat(tempo.cat)
arg.check <- c(arg.check, TRUE)
}
}
if(any(arg.check) == TRUE){
stop() # nothing else because print = TRUE by default in fun_check()
}
# source("C:/Users/Gael/Documents/Git_versions_to_use/debugging_tools_for_r_dev-v1.2/r_debugging_tools-v1.2.R") ; eval(parse(text = str_basic_arg_check_dev)) ; eval(parse(text = str_arg_check_with_fun_check_dev)) # activate this line and use the function (with no arguments left as NULL) to check arguments status and if they have been checked using fun_check()
# no need to set seed because already done ine the main function
loop1 <- trunc(count/(abs(cor.ini) - abs(tempo.cor)) * (abs(tempo.cor) - cor.limit)) # count/(abs(cor.ini) - abs(tempo.cor)) is the number of count per unit of corr. Tis is multiplied by the remaining distance to run ceiling to be over the number of approximate loops in order to reach the cor.limit value
if(is.na(loop1) | ! is.finite(loop1)){
tempo.cat <- (paste0("\n\n============\n\nERROR IN ", function.name, ": CODE INCONSISTENCY 1\n\n============\n\n"))
# code that protects set.seed() in the global environment
# see also Protocol 100-rev0 Parallelization in R.docx
if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment
...
...
@@ -2394,7 +2625,7 @@ cor.ini <- cor(x = data1, y = data2, use = "pairwise.complete.obs", method = cor
tempo.cor <- cor.ini # correlation that will be modified during loops
neg.cor <- FALSE
if(tempo.cor < 0){
tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE cor.limit PARAMETER WILL BE SWITCHED TO THE NEGATIVE EQUIVALENT: ", -cor.limit)
tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE LOOP STEPS WILL BE PERFORMED USING POSITIVE CORRELATIONS BUT THE FINAL CORRELATION WILL BE NEGATIVE")
warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used
neg.cor <- TRUE
tempo.cor <- abs(tempo.cor)
...
...
@@ -2412,8 +2643,8 @@ count2[1] <- 1 # 1 and not 0 because already 1 performed just below
pos[1] <- sample.int(n = pos.seq.max, size = 1, replace = FALSE) # selection of 1 position
if(count < 100){ # a small loop to increase the number of count because count is used to estimate the number of loops by the fun_loop function. Thus count must be large enough to be relatively accurate
loop1 <- trunc(count/(abs(cor.ini) - abs(tempo.cor)) * (abs(tempo.cor) - cor.limit)) # count/(abs(cor.ini) - abs(tempo.cor)) is the number of count per unit of corr. Tis is multiplied by the remaining distance to run ceiling to be over the number of approximate loops in order to reach the cor.limit value
if(is.na(loop1) | ! is.finite(loop1)){
tempo.cat <- (paste0("\n\n============\n\nERROR IN ", function.name, ": CODE INCONSISTENCY 1\n\n============\n\n"))
stop(tempo.cat)
}else if(loop1 > 100000){ # to be sure that 100 more loops will not push tempo.cor below cor.limit
for(i4 in 1:100){
count[1] <- count + 1
pos[1] <- sample.int(n = pos.seq.max, size = 1, replace = FALSE) # selection of 1 position
if(all(sapply(tempo.try, FUN = grepl, pattern = "[Ee]rror"))){
stop(paste0("\n\n================\n\nERROR IN ", function.name, ": PACKAGE ", req.package[i0], " MUST BE INSTALLED IN THE MENTIONNED DIRECTORY:\n", paste(path.lib, collapse = "\n"), "\n\n================\n\n"))
# suppressWarnings(suppressPackageStartupMessages(assign(req.package[i0], reticulate::import(req.package[i0])))) # not required because try() already evaluates