diff --git a/README.md b/README.md index 3e39ca159dcf6b30532bba236a69ef71367c957a..facbccf3c8d05dc632028f3d1f23129d8525c3e0 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ #### DESCRIPTION -Cute Little R Functions contains 40 functions for R/RStudio that facilitate basic procedures in 1) object analysis, 2) object modification, 3) graphic handling and 4) log file management. +Cute Little R Functions contains 41 functions for R/RStudio that facilitate basic procedures in 1) object analysis, 2) object modification, 3) graphic handling and 4) log file management. The function names are: @@ -26,8 +26,8 @@ fun_num2color_mat() #### convert a numeric matrix into hexadecimal color matrix fun_by_case_matrix_op() #### assemble several matrices with operation fun_mat_inv() #### return the inverse of a square matrix fun_mat_fill() #### fill the empty half part of a symmetric square matrix -fun_consec_pos_perm() #### progressively breaks a vector order - +fun_perm() #### progressively breaks a vector order +fun_perm_consec() #### progressively breaks a vector order ## Graphics management @@ -155,6 +155,7 @@ fun_export_data() fun_report() fun_name_change() fun_mat_fill() fun_permut() +fun_permut_consec() fun_empty_graph() fun_gg_palette() fun_gg_just() diff --git a/cute_little_R_functions.R b/cute_little_R_functions.R index 3d4ea19d1458c25fc2c10442013b694676ba13b2..69fa951f3abc40472d8f645eeef50e62a098c13d 100644 --- a/cute_little_R_functions.R +++ b/cute_little_R_functions.R @@ -22,53 +22,54 @@ ################ Object analysis 2 ######## fun_check() #### check class, type, length, etc., of objects 2 -######## fun_info() #### recover object information 2 -######## fun_1d_comp() #### comparison of two 1D datasets (vectors, factors, 1D tables) 2 -######## fun_2d_comp() #### comparison of two 2D datasets (row & col names, dimensions, etc.) 2 -######## fun_2d_head() #### head of the left or right of big 2D objects 2 -######## fun_2d_tail() #### tail of the left or right of big 2D objects 2 -######## fun_list_comp() #### comparison of two lists 2 -################ Object modification 2 -######## fun_name_change() #### check a vector of character strings and modify any string if present in another vector 2 -######## fun_df_remod() #### remodeling a data frame to have column name as a qualitative values and vice-versa 2 -######## fun_merge() #### merge the columns of 2 data frames or 2 matrices 2 -######## fun_round() #### rounding number if decimal present 2 -######## fun_mat_rotate() #### 90° clockwise matrix rotation 2 -######## fun_mat_num2color() #### convert a numeric matrix into hexadecimal color matrix 2 -######## fun_mat_op() #### assemble several matrices with operation 2 -######## fun_mat_inv() #### return the inverse of a square matrix 2 -######## fun_mat_fill() #### fill the empty half part of a symmetric square matrix 2 -######## fun_permut() #### progressively breaks a vector order 2 -################ Graphics management 2 -######## fun_width() #### window width depending on classes to plot 2 -######## fun_open() #### open a GUI or pdf graphic window 2 -######## fun_prior_plot() #### set graph param before plotting (erase axes for instance) 2 -######## fun_scale() #### select nice label numbers when setting number of ticks on an axis 2 -######## fun_post_plot() #### set graph param after plotting (axes redesign for instance) 2 -######## fun_close() #### close specific graphic windows 2 -################ Standard graphics 2 -######## fun_empty_graph() #### text to display for empty graphs 2 -################ gg graphics 2 -######## fun_gg_palette() #### ggplot2 default color palette 2 -######## fun_gg_just() #### ggplot2 justification of the axis labeling, depending on angle 2 -######## fun_gg_point_rast() #### ggplot2 raster scatterplot layer 2 -######## fun_gg_scatter() #### ggplot2 scatterplot + lines (up to 6 overlays totally) 2 -######## fun_gg_bar_mean() #### ggplot2 mean barplot + overlaid dots if required 2 -######## fun_gg_boxplot() #### ggplot2 boxplot + background dots if required 2 -######## fun_gg_bar_prop() #### ggplot2 proportion barplot 2 -######## fun_gg_strip() #### ggplot2 stripchart + mean/median 2 -######## fun_gg_violin() #### ggplot2 violins 2 -######## fun_gg_line() #### ggplot2 lines + background dots and error bars 2 -######## fun_gg_heatmap() #### ggplot2 heatmap + overlaid mask if required 2 -######## fun_gg_empty_graph() #### text to display for empty graphs 2 -################ Graphic extraction 2 -######## fun_trim() #### display values from a quantitative variable and trim according to defined cut-offs 2 -######## fun_segmentation() #### segment a dot cloud on a scatterplot and define the dots from another cloud outside the segmentation 2 -################ Import 2 -######## fun_pack() #### check if R packages are present and import into the working environment 2 -######## fun_python_pack() #### check if python packages are present 2 -################ Exporting results (text & tables) 2 -######## fun_report() #### print string or data object into output file 2 +######## fun_info() #### recover object information 8 +######## fun_1d_comp() #### comparison of two 1D datasets (vectors, factors, 1D tables) 9 +######## fun_2d_comp() #### comparison of two 2D datasets (row & col names, dimensions, etc.) 13 +######## fun_2d_head() #### head of the left or right of big 2D objects 20 +######## fun_2d_tail() #### tail of the left or right of big 2D objects 21 +######## fun_list_comp() #### comparison of two lists 22 +################ Object modification 24 +######## fun_name_change() #### check a vector of character strings and modify any string if present in another vector 24 +######## fun_df_remod() #### remodeling a data frame to have column name as a qualitative values and vice-versa 26 +######## fun_merge() #### merge the columns of two 2D objects, by common rows 29 +######## fun_round() #### rounding number if decimal present 33 +######## fun_mat_rotate() #### 90° clockwise matrix rotation 35 +######## fun_mat_num2color() #### convert a numeric matrix into hexadecimal color matrix 35 +######## fun_mat_op() #### assemble several matrices with operation 38 +######## fun_mat_inv() #### return the inverse of a square matrix 41 +######## fun_mat_fill() #### fill the empty half part of a symmetric square matrix 42 +######## fun_permut() #### progressively breaks a vector order 45 +######## fun_permut2() #### progressively breaks a vector order 52 +################ Graphics management 60 +######## fun_width() #### window width depending on classes to plot 60 +######## fun_open() #### open a GUI or pdf graphic window 62 +######## fun_prior_plot() #### set graph param before plotting (erase axes for instance) 65 +######## fun_scale() #### select nice label numbers when setting number of ticks on an axis 69 +######## fun_post_plot() #### set graph param after plotting (axes redesign for instance) 74 +######## fun_close() #### close specific graphic windows 85 +################ Standard graphics 86 +######## fun_empty_graph() #### text to display for empty graphs 86 +################ gg graphics 88 +######## fun_gg_palette() #### ggplot2 default color palette 88 +######## fun_gg_just() #### ggplot2 justification of the axis labeling, depending on angle 89 +######## fun_gg_point_rast() #### ggplot2 raster scatterplot layer 92 +######## fun_gg_scatter() #### ggplot2 scatterplot + lines (up to 6 overlays totally) 95 +######## fun_gg_bar_mean() #### ggplot2 mean barplot + overlaid dots if required 131 +######## fun_gg_boxplot() #### ggplot2 boxplot + background dots if required 166 +######## fun_gg_bar_prop() #### ggplot2 proportion barplot 171 +######## fun_gg_strip() #### ggplot2 stripchart + mean/median 171 +######## fun_gg_violin() #### ggplot2 violins 171 +######## fun_gg_line() #### ggplot2 lines + background dots and error bars 171 +######## fun_gg_heatmap() #### ggplot2 heatmap + overlaid mask if required 173 +######## fun_gg_empty_graph() #### text to display for empty graphs 187 +################ Graphic extraction 189 +######## fun_trim() #### display values from a quantitative variable and trim according to defined cut-offs 189 +######## fun_segmentation() #### segment a dot cloud on a scatterplot and define the dots from another cloud outside the segmentation 197 +################ Import 229 +######## fun_pack() #### check if R packages are present and import into the working environment 229 +######## fun_python_pack() #### check if python packages are present 231 +################ Exporting results (text & tables) 232 +######## fun_report() #### print string or data object into output file 232 ################################ FUNCTIONS ################################ @@ -2162,14 +2163,14 @@ return(list(mat = mat, warnings = warning)) fun_permut <- function(data1, data2 = NULL, n = NULL, seed = NULL, count.print = 10, text.print = "", cor.method = "spearman", cor.limit = 0.2, warn.print = FALSE, path.lib = NULL){ # AIM -# reorder the elements of the data1 vector by flipping 2 randomly selected consecutive positions either: +# reorder the elements of the data1 vector by flipping 2 randomly selected positions either: # 1) n times (when n is precised) or # 2) until the correlation between data1 and data2 decreases down to the cor.limit (0.2 by default). See cor.limit below to deal with negative correlations -# Example of consecutive position flipping: ABCD -> BACD -> BADC, etc. +# Example of position flipping: ABCD -> DBCA -> DACB, etc. # ARGUMENTS # data1: a vector of at least 2 elements. Must be numeric if data2 is specified # data2: a numeric vector of same length as data1 -# n: number of times "flipping 2 randomly selected consecutive positions". Ignored if data2 is specified +# n: number of times "flipping 2 randomly selected positions". Ignored if data2 is specified # seed: integer number used by set.seed(). Write NULL if random result is required, an integer otherwise. BEWARE: if not NULL, fun_permut() will systematically return the same result when the other parameters keep the same settings # count.print: interger value. Print a working progress message every count.print during loops. BEWARE: can increase substentially the time to complete the process using a small value, like 10 for instance. Use Inf is no loop message desired # text.print: optional message to add to the working progress message every count.print loop @@ -2194,8 +2195,8 @@ fun_permut <- function(data1, data2 = NULL, n = NULL, seed = NULL, count.print = # ini.time <- as.numeric(Sys.time()) ; count <- 0 ; for(i0 in 1:1e9){count <- count + 1} ; tempo.time <- as.numeric(Sys.time()) ; tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - ini.time)) ; tempo.lapse # example (2) showing that for loop is faster than while loop # ini.time <- as.numeric(Sys.time()) ; count <- 0 ; while(count < 1e9){count <- count + 1} ; tempo.time <- as.numeric(Sys.time()) ; tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - ini.time)) ; tempo.lapse -# fun_permut(data1 = LETTERS[1:5], data2 = NULL, n = 20, seed = 1, count.print = 10, text.print = "CPU NB 4", cor.method = "spearman", cor.limit = 0.2) -# fun_permut(data1 = 101:110, data2 = 21:30, n = 20, seed = 1, count.print = 10, text.print = "", cor.method = "spearman", cor.limit = 0.2) +# fun_permut(data1 = LETTERS[1:5], data2 = NULL, n = 100, seed = 1, count.print = 10, text.print = "CPU NB 4") +# fun_permut(data1 = 101:110, data2 = 21:30, seed = 1, count.print = 10, text.print = "", cor.method = "spearman", cor.limit = 0.2) # a way to use the cor.limit argument just considering data1 # obs1 <- 101:110 ; fun_permut(data1 = obs1, data2 = obs1, seed = 1, count.print = 10, cor.method = "spearman", cor.limit = 0.2) # fun_permut(data1 = 1:1e3, data2 = 1e3:1, n = 20, seed = 1, count.print = 1e6, text.print = "", cor.method = "spearman", cor.limit = 0.7) @@ -2278,8 +2279,238 @@ stop() # nothing else because print = TRUE by default in fun_check() # package checking fun_pack(req.package = "lubridate", path.lib = path.lib) # end package checking +# main code +warning <- NULL +# code that protects set.seed() in the global environment +# see also Protocol 100-rev0 Parallelization in R.docx +if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment +tempo.random.seed <- .Random.seed +on.exit(assign(".Random.seed", tempo.random.seed, env = .GlobalEnv)) +}else{ +on.exit(set.seed(NULL)) # inactivate seeding -> return to complete randomness +} +# end code that protects set.seed() in the global environment +if( ! is.null(seed)){ +set.seed(seed) +} +ini.date <- Sys.time() # time of process begin, converted into seconds +ini.time <- as.numeric(ini.date) # time of process begin, converted into seconds +ini.pos <- 1:length(data1) # positions of data1 before permutation loops +tempo.pos <- ini.pos # positions of data1 that will be modified during loops +pos.seq.max <- max(ini.pos, na.rm = TRUE) # max position (used by sample.int() function) +warnings <- NULL +# variable allocation before the loops to save time +tempo.time <- as.numeric(Sys.time()) +tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - ini.time)) +final.loop <- tempo.time +final.exp <- Sys.time() +pos <- ini.pos[1:2] # 2 position in initial position, without the last because always up permutation (pos -> pos+1 & pos+1 -> pos) +tempo.count.print <- count.print # for the printing message +count <- 0 +tempo.cor <- 0 +# end variable allocation before the loops to save time +permut.done <- TRUE +if(is.null(data2)){ +if(length(table(data1)) == 1){ +tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data1 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data1))) +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +permut.done <- FALSE +}else{ +if(tempo.count.print > n){ +tempo.count.print <- n +} +# pos.check <- NULL +for(i2 in 1:n){ +count[1] <- count + 1 +pos[] <- sample.int(n = pos.seq.max, size = 2, replace = FALSE) # random sample of a position to permute, sample.int samples in 1:pos.seq.max. Or sample(x = pos.seq, size = 1, replace = FALSE) but slower +tempo.pos[pos[1:2]] <- tempo.pos[pos[2:1]] +if(count == tempo.count.print){ +tempo.count.print[1] <- tempo.count.print + count.print +tempo.time[1] <- as.numeric(Sys.time()) +tempo.lapse[1] <- round(lubridate::seconds_to_period(tempo.time - ini.time)) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FOR LOOP ", i2, " / ", n, " | TIME SPENT: ", tempo.lapse)) +} +# pos.check <- c(pos.check, pos) +} +cat("\n\n") +} +}else{ +if(length(table(data1)) == 1){ +tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data1 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data1))) +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +tempo.cor[1] <- 1 +}else if(length(table(data2)) == 1){ +tempo.warnings <- paste0("NO PERMUTATION PERFORMED BECAUSE data2 ARGUMENT SEEMS TO BE MADE OF IDENTICAL ELEMENTS: ", names(table(data2))) +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +tempo.cor[1] <- 1 +}else{ +cor.ini <- cor(x = data1, y = data2, use = "pairwise.complete.obs", method = cor.method) +tempo.cor[1] <- cor.ini # correlation that will be modified during loops +neg.cor <- FALSE +if(tempo.cor < 0){ +tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE cor.limit PARAMETER WILL BE SWITCHED TO THE NEGATIVE EQUIVALENT: ", -cor.limit) +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +neg.cor[1] <- TRUE +tempo.cor[1] <- abs(tempo.cor) +} +if(tempo.cor < cor.limit){ # randomize directly all the position to be close to correlation zero +tempo.warnings <- paste0("INITIAL ABSOLUTE VALUE OF THE ", toupper(cor.method), " CORRELATION ", fun_round(tempo.cor), " BETWEEN data1 AND data2 HAS BEEN DETECTED AS BELOW THE CORRELATION LIMIT PARAMETER ", cor.limit, "\nTHE data1 SEQUENCE HAS BEEN COMPLETELY RANDOMIZED TO CORRESPOND TO CORRELATION ZERO") +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +for(i1 in 1:5){ # done 5 times to be sure of the complete randomness +count[1] <- count + 1 +tempo.pos <- sample(x = tempo.pos, size = length(tempo.pos), replace = FALSE) +} +}else{ +while(tempo.cor >= abs(cor.limit)){ +count[1] <- count + 1 +pos[] <- sample.int(n = pos.seq.max, size = 2, replace = FALSE) # selection of 1 position +tempo.pos[pos[1:2]] <- tempo.pos[pos[2:1]] +tempo.cor[1] <- abs(cor(x = data1[tempo.pos], y = data2, use = "pairwise.complete.obs", method = cor.method)) +if(count == tempo.count.print){ +tempo.count.print[1] <- tempo.count.print + count.print +tempo.time[1] <- as.numeric(Sys.time()) +tempo.lapse[1] <- round(lubridate::seconds_to_period(tempo.time - ini.time)) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "WHILE LOOP ", format(count, big.mark=","), " / ? | ", format(count, big.mark=","), " PERMUTATION IN data1 | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4), " | TIME SPENT: ", tempo.lapse)) +} +} +} +tempo.cor <- ifelse(neg.cor == TRUE, -tempo.cor, tempo.cor) +} +} +cat("\n\n") +if(warn.print == TRUE & ! is.null(warning)){ +warning(warning) +cat("\n\n") +} +output <- list(data = data1[tempo.pos], warnings = warnings, cor = if(is.null(data2)){cor(ini.pos, tempo.pos, method = "spearman")}else{tempo.cor}, count = count) +return(output) +} + + +######## fun_permut_consec() #### as fun permut() but permuting consecutive positions + + +fun_permut_consec <- function(data1, data2 = NULL, n = NULL, seed = NULL, count.print = 10, text.print = "", cor.method = "spearman", cor.limit = 0.2, warn.print = FALSE, path.lib = NULL){ +# AIM +# as fun_permut() except that reorder the elements of the data1 vector by flipping 2 randomly selected consecutive positions either: +# 1) n times (when n is precised) or +# 2) until the correlation between data1 and data2 decreases down to the cor.limit (0.2 by default). See cor.limit below to deal with negative correlations +# Example of consecutive position flipping: ABCD -> BACD -> BADC, etc. +# ARGUMENTS +# data1: a vector of at least 2 elements. Must be numeric if data2 is specified +# data2: a numeric vector of same length as data1 +# n: number of times "flipping 2 randomly selected consecutive positions". Ignored if data2 is specified +# seed: integer number used by set.seed(). Write NULL if random result is required, an integer otherwise. BEWARE: if not NULL, fun_permut() will systematically return the same result when the other parameters keep the same settings +# count.print: interger value. Print a working progress message every count.print during loops. BEWARE: can increase substentially the time to complete the process using a small value, like 10 for instance. Use Inf is no loop message desired +# text.print: optional message to add to the working progress message every count.print loop +# cor.method: correlation method. Either "pearson", "kendall" or "spearman". Ignored if data2 is not specified +# cor.limit: a correlation limit (between 0 and 1). Ignored if data2 is not specified. Compute the correlation between data1 and data2, permute the data1 values, and stop the permutation process when the correlation between data1 and data2 decreases down below the cor limit value (0.2 by default). If cor(data1, data2) is negative, then -cor.limit is used and the process stops until the correlation between data1 and data2 increases up over cor.limit (-0.2 by default). BEWARE: write a positive cor.limit even if cor(data1, data2) is known to be negative. The function will automatically uses -cor.limit. If the initial correlation is already below cor.limit (positive correlation) or over -cor.limit (negative correlation), then the data1 value positions are completely randomized (correlation between data1 and data2 is expected to be 0) +# warn.print: logical. Print warnings at the end of the execution? No print if no warning messages +# path.lib: absolute path of the required packages, if not in the default folders +# REQUIRED PACKAGES +# lubridate +# REQUIRED FUNCTIONS FROM CUTE_LITTLE_R_FUNCTION +# fun_check() +# fun_pack() +# fun_round() +# RETURN +# a list containing: +# $data: the modified vector +# $warnings: potential warning messages (in case of negative correlation when data2 is specified). NULL if non warning message +# $cor: a spearman correlation between the initial positions (1:length(data1) and the final positions if data2 is not specified and the final correlation between data1 and data2 otherwise, according to cor.method +# $count: the number of loops used +# EXAMPLES +# example (1) showing that for loop, used in fun_permut_consec(), is faster than while loop +# ini.time <- as.numeric(Sys.time()) ; count <- 0 ; for(i0 in 1:1e9){count <- count + 1} ; tempo.time <- as.numeric(Sys.time()) ; tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - ini.time)) ; tempo.lapse +# example (2) showing that for loop, used in fun_permut_consec(), is faster than while loop +# ini.time <- as.numeric(Sys.time()) ; count <- 0 ; while(count < 1e9){count <- count + 1} ; tempo.time <- as.numeric(Sys.time()) ; tempo.lapse <- round(lubridate::seconds_to_period(tempo.time - ini.time)) ; tempo.lapse +# fun_permut_consec(data1 = LETTERS[1:5], data2 = NULL, n = 100, seed = 1, count.print = 10, text.print = "CPU NB 4") +# fun_permut_consec(data1 = 101:110, data2 = 21:30, seed = 1, count.print = 10, text.print = "", cor.method = "spearman", cor.limit = 0.2) +# a way to use the cor.limit argument just considering data1 +# obs1 <- 101:110 ; fun_permut_consec(data1 = obs1, data2 = obs1, seed = 1, count.print = 10, cor.method = "spearman", cor.limit = 0.2) +# fun_permut_consec(data1 = 1:1e3, data2 = 1e3:1, n = 20, seed = 1, count.print = 1e6, text.print = "", cor.method = "spearman", cor.limit = 0.7) +# fun_permut_consec(data1 = 1:1e2, data2 = 1e2:1, seed = 1, count.print = 1e3, cor.limit = 0.5) +# fun_permut_consec(data1 = c(0,0,0,0,0), n = 5, data2 = NULL, seed = 1, count.print = 1e3, cor.limit = 0.5) +# DEBUGGING +# data1 = LETTERS[1:5] ; data2 = NULL ; n = 10 ; seed = NULL ; count.print = 10 ; text.print = "" ; cor.method = "spearman" ; cor.limit = 0.2 ; warn.print = TRUE ; path.lib = NULL +# data1 = LETTERS[1:5] ; data2 = NULL ; n = 10 ; seed = 22 ; count.print = 10 ; text.print = "" ; cor.method = "spearman" ; cor.limit = 0.2 ; warn.print = TRUE ; path.lib = NULL +# data1 = 101:110 ; data2 = 21:30 ; n = 10 ; seed = 22 ; count.print = 10 ; text.print = "" ; cor.method = "spearman" ; cor.limit = 0.2 ; warn.print = TRUE ; path.lib = NULL +# data1 = 1:1e3 ; data2 = 1e3:1 ; n = 20 ; seed = 22 ; count.print = 1e6 ; text.print = "" ; cor.method = "spearman" ; cor.limit = 0.7 ; warn.print = TRUE ; path.lib = NULL +# function name +function.name <- paste0(as.list(match.call(expand.dots=FALSE))[[1]], "()") +# end function name +# required function checking +if(length(find("fun_check", mode = "function")) == 0){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_check() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n") +stop(tempo.cat) +} +if(length(find("fun_pack", mode = "function")) == 0){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_pack() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n") +stop(tempo.cat) +} +if(length(find("fun_round", mode = "function")) == 0){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": REQUIRED fun_pack() FUNCTION IS MISSING IN THE R ENVIRONMENT\n\n================\n\n") +stop(tempo.cat) +} +# end required function checking +# argument checking +arg.check <- NULL # for function debbuging +checked.arg.names <- NULL # for function debbuging +ee <- expression(arg.check <- c(arg.check, tempo$problem) , checked.arg.names <- c(checked.arg.names, tempo$param.name)) +tempo <- fun_check(data = data1, class = "vector", fun.name = function.name) ; eval(ee) +if(tempo$problem == FALSE & length(data1) < 2){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 ARGUMENT MUST BE A VECTOR OF MINIMUM LENGTH 2. HERE IT IS: ", length(data1),"\n\n================\n\n") +cat(tempo.cat) +arg.check <- c(arg.check, TRUE) +} +if( ! is.null(data2)){ +tempo <- fun_check(data = data1, class = "vector", mode = "numeric", fun.name = function.name) ; eval(ee) +if(tempo$problem == TRUE){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 MUST BE A NUMERIC VECTOR IF data2 ARGUMENT IS SPECIFIED\n\n================\n\n") +cat(tempo.cat) +arg.check <- c(arg.check, TRUE) +} +tempo <- fun_check(data = data2, class = "vector", mode = "numeric", fun.name = function.name) ; eval(ee) +if(length(data1) != length(data2)){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": data1 AND data2 MUST BE VECTOR OF SAME LENGTH. HERE IT IS ", length(data1)," AND ", length(data2), "\n\n================\n\n") +cat(tempo.cat) +arg.check <- c(arg.check, TRUE) +} +}else if(is.null(n)){ +tempo.cat <- paste0("\n\n================\n\nERROR IN ", function.name, ": n ARGUMENT CANNOT BE NULL IF data2 ARGUMENT IS NULL\n\n================\n\n") +cat(tempo.cat) +arg.check <- c(arg.check, TRUE) +} +if( ! is.null(n)){ +tempo <- fun_check(data = n, class = "vector", typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = function.name) ; eval(ee) +} +if( ! is.null(seed)){ +tempo <- fun_check(data = seed, class = "vector", typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = function.name) ; eval(ee) +} +tempo <- fun_check(data = count.print, class = "vector", typeof = "integer", length = 1, double.as.integer.allowed = TRUE, neg.values = FALSE, fun.name = function.name) ; eval(ee) +tempo <- fun_check(data = text.print, class = "character", length = 1, fun.name = function.name) ; eval(ee) +tempo <- fun_check(data = cor.method, options = c("pearson", "kendall", "spearman"), length =1, fun.name = function.name) ; eval(ee) +tempo <- fun_check(data = cor.limit, class = "vector", mode = "numeric", prop = TRUE, length = 1, fun.name = function.name) ; eval(ee) +tempo <- fun_check(data = warn.print, class = "logical", length = 1, fun.name = function.name) ; eval(ee) +if( ! is.null(path.lib)){ +tempo <- fun_check(data = path.lib, class = "character", fun.name = function.name) ; eval(ee) +if(tempo$problem == FALSE & ! all(dir.exists(path.lib))){ +tempo.cat <- paste0("\n\n============\n\nERROR IN ", function.name, ": \nDIRECTORY PATH INDICATED IN THE path.lib PARAMETER DOES NOT EXISTS: ", path.lib, "\n\n============\n\n") +cat(tempo.cat) +arg.check <- c(arg.check, TRUE) +} +} +if(any(arg.check) == TRUE){ +stop() # nothing else because print = TRUE by default in fun_check() +} +# source("C:/Users/Gael/Documents/Git_versions_to_use/debugging_tools_for_r_dev-v1.2/r_debugging_tools-v1.2.R") ; eval(parse(text = str_basic_arg_check_dev)) ; eval(parse(text = str_arg_check_with_fun_check_dev)) # activate this line and use the function (with no arguments left as NULL) to check arguments status and if they have been checked using fun_check() +# end argument checking +# package checking +fun_pack(req.package = "lubridate", path.lib = path.lib) +# end package checking # local function -fun_loop <- function(round, count, tempo.pos, pos.seq.max, pos, data1, data2, cor.method, cor.ini, cor.limit, tempo.cor, tempo.time, tempo.lapse, final.loop, final.exp, BREAK, seed){ +fun_loop <- function(round, count, tempo.pos, pos.seq.max, pos, data1, data2, cor.method, cor.ini, cor.limit, tempo.cor, tempo.time, tempo.lapse, final.loop, final.exp, BREAK){ +# no need to set seed because already done ine the main function loop1 <- trunc(count/(abs(cor.ini) - abs(tempo.cor)) * (abs(tempo.cor) - cor.limit)) # count/(abs(cor.ini) - abs(tempo.cor)) is the number of count per unit of corr. Tis is multiplied by the remaining distance to run ceiling to be over the number of approximate loops in order to reach the cor.limit value if(is.na(loop1) | ! is.finite(loop1)){ tempo.cat <- (paste0("\n\n============\n\nERROR IN ", function.name, ": CODE INCONSISTENCY 1\n\n============\n\n")) @@ -2291,10 +2522,11 @@ loop1 <- 1e9 }else{ loop2 <- 1 } -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FOR LOOP STEP | ROUND: ", round, " | LOOP1: ", format(loop1, big.mark=","), " | LOOP2: ", format(loop2, big.mark=","), " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4), "\n")) if(loop2 == 1 & loop1 < 100000){ BREAK <- TRUE +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "NO FOR LOOP STEP USED BECAUSE LOOP1 AND LOOP2 TOO LOW | LOOP1: ", format(loop1, big.mark=","), " | LOOP2: ", format(loop2, big.mark=","))) }else{ +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "INITIAL SETTINGS BEFORE FOR LOOP STEP | ROUND: ", round, " | LOOP1: ", format(loop1, big.mark=","), " | LOOP2: ", format(loop2, big.mark=","), " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) tempo.date.loop <- Sys.time() tempo.time.loop <- as.numeric(tempo.date.loop) total.loop <- loop1 * loop2 @@ -2320,7 +2552,6 @@ return(list(tempo.pos = tempo.pos, count = count, BREAK = BREAK)) # BEWARE: resp } #end local function # main code -warning <- NULL # code that protects set.seed() in the global environment # see also Protocol 100-rev0 Parallelization in R.docx if(exists(".Random.seed", envir = .GlobalEnv)){ # if .Random.seed does not exists, it means that no random operation has been performed yet in any R environment @@ -2394,7 +2625,7 @@ cor.ini <- cor(x = data1, y = data2, use = "pairwise.complete.obs", method = cor tempo.cor <- cor.ini # correlation that will be modified during loops neg.cor <- FALSE if(tempo.cor < 0){ -tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE cor.limit PARAMETER WILL BE SWITCHED TO THE NEGATIVE EQUIVALENT: ", -cor.limit) +tempo.warnings <- paste0("INITIAL ", toupper(cor.method), " CORRELATION BETWEEN data1 AND data2 HAS BEEN DETECTED AS NEGATIVE: ", tempo.cor, ". THE LOOP STEPS WILL BE PERFORMED USING POSITIVE CORRELATIONS BUT THE FINAL CORRELATION WILL BE NEGATIVE") warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used neg.cor <- TRUE tempo.cor <- abs(tempo.cor) @@ -2412,8 +2643,8 @@ count2[1] <- 1 # 1 and not 0 because already 1 performed just below pos[1] <- sample.int(n = pos.seq.max, size = 1, replace = FALSE) # selection of 1 position tempo.pos[(pos + 1):pos] <- tempo.pos[pos:(pos + 1)] tempo.cor[1] <- abs(cor(x = data1[tempo.pos], y = data2, use = "pairwise.complete.obs", method = cor.method)) -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FIRST WHILE LOOP STEP")) -while(tempo.cor == abs(cor.ini)){ +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FIRST FOR & WHILE LOOP STEP | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) +while(tempo.cor == abs(cor.ini)){ # to be out of equality between tempo.cor and abs(cor.ini) at the beginning (only valid for very long vector) count[1] <- count + 1 count2[1] <- count2 + 1 pos[1] <- sample.int(n = pos.seq.max, size = 1, replace = FALSE) # selection of 1 position @@ -2426,30 +2657,50 @@ tempo.lapse[1] <- round(lubridate::seconds_to_period(tempo.time - ini.time)) cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FIRST WHILE LOOP ", format(count2, big.mark=","), " / ? | ", format(count, big.mark=","), " PERMUTATION IN data1 | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4), " | TIME SPENT: ", tempo.lapse)) } } -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FIRST WHILE LOOP END")) +if(count < 100){ # a small loop to increase the number of count because count is used to estimate the number of loops by the fun_loop function. Thus count must be large enough to be relatively accurate +loop1 <- trunc(count/(abs(cor.ini) - abs(tempo.cor)) * (abs(tempo.cor) - cor.limit)) # count/(abs(cor.ini) - abs(tempo.cor)) is the number of count per unit of corr. Tis is multiplied by the remaining distance to run ceiling to be over the number of approximate loops in order to reach the cor.limit value +if(is.na(loop1) | ! is.finite(loop1)){ +tempo.cat <- (paste0("\n\n============\n\nERROR IN ", function.name, ": CODE INCONSISTENCY 1\n\n============\n\n")) +stop(tempo.cat) +}else if(loop1 > 100000){ # to be sure that 100 more loops will not push tempo.cor below cor.limit +for(i4 in 1:100){ +count[1] <- count + 1 +pos[1] <- sample.int(n = pos.seq.max, size = 1, replace = FALSE) # selection of 1 position +tempo.pos[(pos + 1):pos] <- tempo.pos[pos:(pos + 1)] +} +}else if(loop1 <= 100000){ +BREAK[1] <- TRUE # to inactivate the for loop and go directly to the third while loop +} +} +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FIRST FOR & WHILE LOOP STEP END | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) tempo.cor[1] <- abs(cor(x = data1[tempo.pos], y = data2, use = "pairwise.complete.obs", method = cor.method)) if(tempo.cor < cor.limit){ -tempo.warnings <- paste0("THE FOR LOOP STEPS HAVE BEEN TOO FAR AND THE SECOND WHILE LOOP STEP HAS NOT BEEN USED") +tempo.warnings <- paste0("THE FIRST FOR & WHILE LOOP STEPS HAVE BEEN TOO FAR AND SUBSEQUENT LOOP STEPS HAVE NOT BEEN USED") warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used } tempo.date.loop <- Sys.time() tempo.time.loop <- as.numeric(tempo.date.loop) tempo.cor.loop <- tempo.cor +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "SECOND FOR LOOP STEP | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) while(tempo.cor > cor.limit & BREAK == FALSE){ round[1] <- round + 1 -tempo.res[] <- fun_loop(round = round, count = count, tempo.pos = tempo.pos, pos.seq.max = pos.seq.max, pos = pos, data1 = data1, data2 = data2, cor.method = cor.method, cor.ini = cor.ini, cor.limit = cor.limit, tempo.cor = tempo.cor, tempo.time = tempo.time, tempo.lapse = tempo.lapse, final.loop = final.loop, final.exp = final.exp, BREAK = BREAK) +tempo.res[] <- fun_loop(round, count, tempo.pos, pos.seq.max, pos, data1, data2, cor.method, cor.ini, cor.limit, tempo.cor, tempo.time, tempo.lapse, final.loop, final.exp, BREAK) tempo.pos[] <- tempo.res[[1]] count[1] <- tempo.res[[2]] BREAK[1] <- tempo.res[[3]] tempo.cor[1] <- abs(cor(x = data1[tempo.pos], y = data2, use = "pairwise.complete.obs", method = cor.method)) } -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "FOR LOOP END | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "SECOND FOR LOOP END | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) tempo.cor[1] <- abs(cor(x = data1[tempo.pos], y = data2, use = "pairwise.complete.obs", method = cor.method)) +if(tempo.cor < cor.limit){ +tempo.warnings <- paste0("THE FOR LOOP STEPS HAVE BEEN TOO FAR AND THE THIRD WHILE LOOP STEP HAS NOT BEEN USED") +warnings <- paste0(warnings, ifelse(is.null(warnings), "", "\n"), tempo.warnings) # in fact, abs(tempo.cor) is systematicallu used +} tempo.date.loop <- Sys.time() tempo.time.loop <- as.numeric(tempo.date.loop) tempo.cor.loop <- tempo.cor count4[1] <- 0 -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "SECOND WHILE LOOP STEP")) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "THIRD WHILE LOOP STEP | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) while(tempo.cor > cor.limit){ count[1] <- count + 1 count4[1] <- count4 + 1 @@ -2462,14 +2713,15 @@ tempo.time[1] <- as.numeric(Sys.time()) tempo.lapse[1] <- round(lubridate::seconds_to_period(tempo.time - ini.time)) final.loop[1] <- (tempo.time - tempo.time.loop) / (tempo.cor.loop - tempo.cor) * (tempo.cor - cor.limit) # tempo.cor.loop - tempo.cor always positive and tempo.cor decreases progressively starting from tempo.cor.loop final.exp[1] <- as.POSIXct(final.loop, origin = tempo.date.loop) -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "SECOND WHILE LOOP ", format(count4, big.mark=","), " / ? | ", format(count, big.mark=","), " PERMUTATION IN data1 | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4), " | TIME SPENT: ", tempo.lapse, " | EXPECTED END: ", final.exp)) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "THIRD WHILE LOOP ", format(count4, big.mark=","), " / ? | ", format(count, big.mark=","), " PERMUTATION IN data1 | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4), " | TIME SPENT: ", tempo.lapse, " | EXPECTED END: ", final.exp)) } } -cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "SECOND WHILE LOOP END\n\n")) +cat(paste0("\n", ifelse(text.print == "", "", paste0(text.print, " | ")), "THIRD WHILE LOOP END | LOOP COUNT: ", count, " | CORRELATION LIMIT: ", fun_round(cor.limit, 4), " | TEMPO CORRELATION: ", fun_round(tempo.cor, 4))) } tempo.cor <- ifelse(neg.cor == TRUE, -tempo.cor, tempo.cor) } } +cat("\n\n") if(warn.print == TRUE & ! is.null(warning)){ warning(warning) cat("\n\n") @@ -8325,6 +8577,7 @@ fun_pack <- function(req.package, load = FALSE, path.lib = NULL){ # fun_pack(req.package = "ggplot2", path.lib = "blablabla") # DEBUGGING # req.package = "ggplot2" ; path.lib = "C:/Program Files/R/R-3.5.1/library" +# req.package = "serpentine" ; path.lib = "C:/users/gael/appdata/roaming/python/python36/site-packages" # function name function.name <- paste0(as.list(match.call(expand.dots=FALSE))[[1]], "()") # end function name @@ -8393,7 +8646,7 @@ fun_python_pack <- function(req.package, path.lib = NULL, R.path.lib = NULL){ # fun_python_pack(req.package = "serpentine") # fun_python_pack(req.package = "serpentine", path.lib = "blablabla") # DEBUGGING -# req.package = "serpentine" ; path.lib = "C:/Program Files/R/R-3.5.1/library" ; R.path.lib = NULL +# req.package = "serpentine" ; path.lib = NULL ; R.path.lib = NULL # req.package = "bad" ; path.lib = NULL ; R.path.lib = NULL # function name function.name <- paste0(as.list(match.call(expand.dots=FALSE))[[1]], "()") @@ -8446,12 +8699,12 @@ path.lib <- path.lib$path_lib for(i0 in 1:length(req.package)){ tempo.try <- vector("list", length = length(path.lib)) for(i1 in 1:length(path.lib)){ -tempo.try[[i1]] <- try(suppressWarnings(reticulate::import_from_path(req.package[i0], path = path.lib[i1])), silent = TRUE) +tempo.try[[i1]] <- suppressWarnings(try(reticulate::import_from_path(req.package[i0], path = path.lib[i1]), silent = TRUE)) } if(all(sapply(tempo.try, FUN = grepl, pattern = "[Ee]rror"))){ stop(paste0("\n\n================\n\nERROR IN ", function.name, ": PACKAGE ", req.package[i0], " MUST BE INSTALLED IN THE MENTIONNED DIRECTORY:\n", paste(path.lib, collapse = "\n"), "\n\n================\n\n")) }else{ -assign(req.package[i0], reticulate::import(req.package[i0])) +# suppressWarnings(suppressPackageStartupMessages(assign(req.package[i0], reticulate::import(req.package[i0])))) # not required because try() already evaluates } } } diff --git a/cute_little_R_functions.docx b/cute_little_R_functions.docx index 4d53214d90b0d05a6ce44076cf2aac8683f3a45a..63eccb379a02a3af4d864da37d8828cfce6b2fa0 100644 Binary files a/cute_little_R_functions.docx and b/cute_little_R_functions.docx differ