Commit 01df2bae authored by Gael's avatar Gael
Browse files

release v5.2.0

parent 8dfdf68b
......@@ -155,6 +155,11 @@ Neter J, Kutner MH, Nachtsheim CJ, Wasserman W. Applied Linear Statistical Model
## WHAT'S NEW IN
### v5.2.0
1) config file now present in the result folder
### v5.1.0
1) Bug corrected
......
......@@ -101,16 +101,18 @@ if(interactive() == FALSE){ # if(grepl(x = commandArgs(trailingOnly = FALSE), pa
}
}else if(sys.nframe() == 0L){ # detection of copy-paste/direct execution (for debugging). With script it is also 0, with source, it is 4
run.way <- "DIRECT RUNNING DETECTION"
config.path <- "C:/Users/Gael/Documents/Git_projects/anova_contrasts/anova_contrasts.config"
cat(paste0("\n\n", run.way, "\n"))
source("C:/Users/Gael/Documents/Git_projects/anova_contrasts/anova_contrasts.config", local = .GlobalEnv)
cat(paste0("\n\n============\n\nSTRONG WARNING IN ", script, "\nDIRECT RUNNING EXECUTION USE THIS FILE AS CONFIG FILE:\nC:/Users/Gael/Documents/Git_projects/anova_contrasts/anova_contrasts.config\n\n============\n\n"), call. = FALSE)
source(config.path, local = .GlobalEnv)
cat(paste0("\n\n============\n\nSTRONG WARNING IN ", script, "\nDIRECT RUNNING EXECUTION USE THIS FILE AS CONFIG FILE:\n", config.path, "\n\n============\n\n"), call. = FALSE)
}else{
run.way <- "SOURCE RUNNING DETECTION" # using source(), sys.nframe() is 4
cat(paste0("\n\n", run.way, "\n"))
if( ! file.exists(paste0(dirname(parent.frame(2)$ofile), "/anova_contrasts.config"))){
stop(paste0("\n\n============\n\nERROR IN ANOVA_CONTRASTS\nanova_contrasts.config FILE NOT PRESENT WHERE THE anova_contrast.R SOURCE FILE IS LOCATED:\n", dirname(parent.frame(2)$ofile), "\nPLEASE, DO NOT MODIFY THE NAME OF THE anova_contrasts.config FILE (OR MODIFY THE CODE IN THE anova.contrast_source FILE, CONFIG IMPORT SECTION)\n\n============\n\n"), call. = FALSE)
}else{
source(paste0(dirname(parent.frame(2)$ofile), "/anova_contrasts.config"), local = .GlobalEnv) # source the parameters used below
config.path <- paste0(dirname(parent.frame(2)$ofile), "/anova_contrasts.config")
source(config.path, local = .GlobalEnv) # source the parameters used below
}
}
......@@ -125,7 +127,8 @@ param.list <- c(
"erase.objects",
"erase.graphs",
"script",
"run.way",
"run.way",
"config.path",
if(run.way == "SCRIPT RUNNING DETECTION"){"command"},
"project.name",
"file.name1",
......@@ -585,6 +588,8 @@ fun_report(data = ini.date, path = out.path, output = log.file, vector.cat = TRU
fun_report(data = "\n\n################################ INITIAL SETTINGS AND DATA MODIFICATIONS", path = out.path, output = log.file, sep = 4)
options(contrasts = c(contrast.kind, contrast.kind))
fun_report(path = out.path, data = paste0("ESTIMATES AND CONTRASTS USING (options(contrast = '", contrast.kind, "', '", contrast.kind, "'))"), output = log.file)
file.copy(from = config.path, to = paste0(out.path, "/anova_contrasts.config"), overwrite = TRUE, recursive = FALSE, copy.mode = FALSE, copy.date = TRUE)
fun_report(data = paste0("CONFIG FILE USED HAS BEEN BACKUPED IN:\n", out.path), path = out.path, output = log.file)
################ End ignition
......@@ -619,7 +624,7 @@ user.data.pre <- read.table(paste0(in.path, get("file.name1")), header=TRUE, sep
if(any(user.data.pre == "", na.rm = TRUE)){
stop(paste0("\n\n============\n\nERROR: ", get("file.name1"), "FILE (file.name1 PARAMETER) CONTAINS EMPTY CELLS INDICATED BY \"\" WHICH IS NOT ALLOWED\nEXAMPLE (ROW COL, WITHOUT HEADER): ", paste(which(user.data.pre == "", arr.ind = TRUE)[1, ], collapse = " "), "\n\n============\n\n"), call. = FALSE)
stop(paste0("\n\n============\n\nERROR: ", get("file.name1"), "FILE (file.name1 PARAMETER) CONTAINS EMPTY CELLS INDICATED BY \"\" WHICH IS NOT ALLOWED\nEXAMPLE (ROW COL, WITHOUT HEADER): ", paste(which(user.data.pre == "", arr.ind = TRUE)[1, ], collapse = " "), "\nREPLACING ALL COMMAS BY DOTS SOLVES THIS FOR QUANTI COLUMNS\n\n============\n\n"), call. = FALSE)
}
if( ! all(response.var %in% names(user.data.pre))){
stop(paste0("\n\n============\n\nERROR: response.var PARAMETER MUST INCLUDE A COLUMN NAME OF THE IMPORTED DATASET:\n\n", paste(paste(c("response.var", "COL NAMES OF THE DATASET"), mapply(list(response.var, names(user.data.pre)), FUN = "paste", collapse = " "), sep = ": "), collapse = "\n"), "\n\n============\n\n"), call. = FALSE)
......
#########################################################################
## ##
## anova & contrasts ##
## ##
## Gael A. Millot ##
## ##
## ##
#########################################################################
################################ Parameters that need to be set by the user
################ Mandatory settings
######## File names and locations
file.name1 <- "Internalization.txt" # single character string indicating the name of the data frame file (column names are mandatory). Beware: "space hyphen space" not allowed in column names
in.path <- "C:/Users/Gael/Documents/Git_projects/anova_contrasts/dataset/" # single character string indicating the absolute pathway of the folder containing the input data files (file.name1 and file.name2)
out.path <- "C:/Users/Gael/Desktop/" # absolute pathway of the destination folder that will contain the results (exported data)
######## R packages and cute_little_R_functions file locations
function1.path <- "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/-/raw/ce5910ddf6d10850e79788f7bd05251c2e404b2b/cute_little_R_functions.R" # single character string indicating the file (and absolute pathway) of the required cute_little_R_functions toolbox. With ethernet connection available, this can also be used: "https://gitlab.pasteur.fr/gmillot/cute_little_R_functions/raw/v5.1.0/cute_little_R_functions.R" or local "C:\\Users\\Gael\\Documents\\Git_projects\\cute_little_R_functions\\cute_little_R_functions.R"
######## Model parameters
response.var <- "Internalization.percent" # single character string indicating the name of the response variable (column name of the imported data frame)
response.var.class.rm <- NULL # vector of character string indicating the name of the classes of the response variable to remove before modelling. Write NULL if no class to remove. Not considered if model.kind parameter is "lm"
predict.var <- c("Time", "Bacteria") # vector of character string indicating the name of the predictive variables (column names of the imported data frame), i.e., all the qualitative variables that must be in the data frame used to built the model, plot graphics and make the 2*2 contrast tests. BEWARE: possible to have here variable that are not in the models (like technical replicate column). If quantitative predictive variables are in the models, they must be reported here
predict.var.class.rm <- list(L1 = "30min", L2 = "AS9compCT622") # list of vector of character string indicating the classes of predict.var to remove before modelling. The list must be of same length as predict.var, each compartment of the list corresponding to each variable of predict.var. For each compatement, either write the name of the classes to remove or NULL if no class to remove for this variable. Classes specified in this list will be removed before modelling. Can be useful for PCR analysis, by removing the reference gene. Ignored if NULL
indep.quali.var <- NULL # single character string indicating an independent quali variable. All the anova, graphs, and contrasts tests will be performed for each class of this variable. Thus, must be present in the data frame analyzed. But must neither be present in the models (see below model1, model2 and model3 arguments), nor in the graphics arguments (categ.boxplot, row.facet, col.facet, add), nor in the constrasts argument (contrast.var). Ignored if NULL
indep.quali.var.class.rm <- NULL # vector of character string indicating the classes of indep.quali.var to remove before modelling. Can be useful for PCR analysis, by removing the reference gene. Ignored if NULL
mean.quali <- NULL # vector of character string indicating the variables of predict.var for which class means has to be performed -> a single mean value of response.var per class of mean.quali. Mean on technical replicates for instance. Variables of predict.var not present in mean.quali will disappear (column disappeance) due to averaging. Thus, variables of predict.var not present in mean.quali must neither be present in the models (see below model1, model2 and model3 arguments), nor in the graphics arguments (categ.boxplot, row.facet, col.facet, add), nor in the constrasts argument (contrast.var). Ignored if NULL. Warning: cannot be used is quantitative variables in predict.var (because some values will be lost)
model.kind <- "lm"# single character string indicating the kind of model. Either "lm" or "glm" or "multinom"
model.family <- "binomial" # single character string indicating the glm family. Ignored if model.kind is not "glm". See http://127.0.0.1:15590/library/stats/html/family.html for the different functions for the different choices. Do not write NULL
only.quali <- FALSE # logical. Only qualitative variable as predictive variables (anova) ? If TRUE and any integer variable, will be converted into factor. If TRUE and double variable, will return an ERROR. If FALSE -> ancova
######## Models
# Reminder:
# 1) in balanced design, type I, II and III provide the same anova result
# 2) without interaction, type II and III provide the same anova result
# See protocol 103
# standard (just additive and interaction effects, type I, II, or III)
model1 <- paste0(response.var, " ~ Time * Bacteria") # single character string. Exemple: "Delta.Ct ~ Strain*Gene*Condition". If a Tech.replicate variable exists in the dataset, its variation is in the residual error
# using random effect with Error() (for anova type I and for balanced design only)
model2 <- NULL # single character string. Example: "response.var, " ~ geno * cond + Error(geno/indiv)". This model means indiv is nested into geno but geno is considered as random (which is not the case). BEWARE: "Delta.Ct ~ (1|Tech.replicate) + Strain*Gene*Condition" does not work and "formula = Delta.Ct ~ (1/Tech.replicate) + Strain*Gene*Condition" is equivalent to "Delta.Ct ~ Strain*Gene*Condition"
# using random effect (mixed model, type I, II, or III)
model3 <- NULL # single character string. Example: "response.var, " ~ geno + cond + (1|geno/exp)", 1|geno/exp meaning that exp is nested into geno and geno is a random effect. This can also be written "response.var, " ~ (1|geno) + cond + (1|geno:exp)". "Delta.Ct ~ (1|Tech.replicate) + Strain*Gene*Condition" meaninng that Tech.replicate is random effect
# simple model (to compare with model1, it is just One-way analysis of means not assuming equal variances, i.e., 2 by 2 t tests with welch correction. Log transfo is never applied because this method is robust to normality assumption)
simple.model <- "Bacteria" # single string indicating the qualitative variable used for t tests. If non NULL, formula used is response.var ~ simple.var. If NULL, no simple model is used
######## Graphics
categ.boxplot = c("Time", "Bacteria") # vector of character string of max length 2 indicating the name of the variables to represent in a single boxplot panel (first variable as x-axis and potential second variable as group of boxes). Write NULL if no boxplot wanted
row.facet <- NULL # vector of character string indicating the name of the variables used for multipaneling (each row of the multipanel matrice is crossed classes of the row.facet variables). Ignored if NULL
col.facet <- NULL # vector of character string indicating the name of the variables used for multipaneling (each column of the multipanel matrice is crossed classes of the col.facet variables). Ignored if NULL
######## Contrasts
contrast.var <- c("Bacteria", "Time") # vector of character string indicating the variables used for contrast tests (2x2 class comparison). Estimates averaging will be performed if predictive variables are present in the model but not here . Order matters with several variables, but only for the way the results are presented (with c("Bacteria", "Time"), order is first with Bacteria then with Time). More precisely, 2x2 comparison is performed between classes of a variable, for each class of the other variables. In other words, comparison of 2 classes from 2 variables is not allowed, because to difficult to interpret. As an exemple, with contrast.var <- c("Strain", "Condition"), comparison of "WT" and "Treat" from Strain is performed for each class of Condition, and vice-versa. If NULL, no contrast performed
interaction <- NULL # single character string indicating if interaction contrast comparisons must be added in the contrast analysis. Either "add" (add to the other contrast comparison) or "only" (only perfom this comparison). Write NULL if not required. Interaction comparison means that all the couple of effect differences are compared. For instance, 1h - 2h versus AS9 - WT is compared (diff of Time effect vs diff of Bacteria effect), which is different from AS9 - WT in the 1h class (as obtained with NULL or "add").If not NULL, at least two variable must be present in contrast.var
no.p.value.var <- NULL # vector of character string indicating one or several qualitatives variables that are in the contrast analysis (thus which are in contrast.var) but for which no 2 x 2 intraclass comparison is really required. In fact, this parameter removes some of the lines in all the possible 2 x 2 comparison using all the contrast.var variables. This just allows to decrease the impact of the p value adjustment. If NULL, no lines removed in the 2 x 2 comparison. BEWARE: this is different from removing a variable in contrast.var. Example: using contrast.var <- c("Bacteria", "Time") and no.p.value.var <- NULL, 4 p values are computed (because 2 classes per variable). Using contrast.var <- c("Bacteria") and no.p.value.var <- NULL, 1 p values is computed (because 2 classes per variable Bacteria and estimates averaged according to Time). Using contrast.var <- c("Bacteria", "Time") and no.p.value.var <- "Time", 2 p values are computed (the 2 classes of Bacteria ocmpared of each of the 2 classes of Time). This is also different from using the indep.quali.var because in such case, analysis are completely separated for each class of indep.quali.var
################ End Mandatory settings
################ Optional settings
######## File names and locations
project.name <- "anova_contrasts" # single character string indicating the project name
######## R packages and cute_little_R_functions file locations
lib.path <- NULL # vector of character strings that define the absolute pathway of the folder containing the R packages. Write NULL for the default path. BEWARE: default path is dependent on the system and interface used. For instance, using cygwin64 on windows 7, the path is "C:/Program Files/R/R-3.5.3/library". On the same cmputer using the R classical interface, the paths are [1] "C:/Users/Gael/Documents/R/win-library/3.5" [2] "C:/Program Files/R/R-3.5.3/library"
######## Model parameters
y.log2.trans <- FALSE # logical. Log transform the response (y) values ? WARNING: if negative or 0 values are present, values of the response.var parameter are converted according to y <- y + abs(min(y)) + 1
neg.rm <- FALSE # remove negative or null values of the response var ? This is to allow the log transfo. Ignored if y.log2.trans is FALSE
ss.type <- 3 # type of sum of squares used by model3. Either 2 for type II or 3 for type 3 (recommanded by default). Mixed models (model3) cannot accept 1 (car::Anova() function used in model3). For 1 for type I (R default), use model1 or model2
######## Models
######## Graphics
activate.pdf <- TRUE # logical. Write TRUE for pdf display and FALSE for R display (main graphs)
add <- NULL # single character string for additional features in the boxplot (see add argument of fun_gg_boxplot() function)
optional.text <- "" # single character string that add text in plots
######## Contrasts
contrast.kind <- "contr.sum" # kind of contrast treatment. See http://127.0.0.1:14927/library/stats/html/contrast.html. Not sure that emmeans use this -> to be checked. Example: "contr.treatment" or "contr.sum"
p.adj <- "BH" # correction of p values due to multiple comparisons. One of these c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none"). "BH" and "fdr" are the same
######## Others
warn.secu <- FALSE # logical. Display if internal homemade functions of anova_contrasts and anova_contrasts have variables that are present in other environments?
nrow.disp <- 200 # positive integer indicating the number of rows of dataset over which tables are not displayed in the report file
ncol.disp <- 10 # positive integer indicating the number of columns of dataset over which tables are not displayed in the report file
test.log.cor <- FALSE # logical. If TRUE, test the correction applies during log2 transformation if negative or 0 values are present. This means that values of the response.var parameter are converted according to y <- y + abs(min(y)) + 1
################ End Optional settings
################################ End Parameters that need to be set by the user
Internalization.percent Time Bacteria FAKE_VAR
65.7 1h WT Analyse1
32.24 1h WT Analyse1
43.6 1h WT Analyse1
49.2 1h WT Analyse1
31 1h AS9 Analyse1
23.3 1h AS9 Analyse1
21.7 1h AS9 Analyse1
26.13 1h AS9 Analyse1
60 2h WT Analyse1
60.5 2h WT Analyse1
72.2 2h WT Analyse1
48.6 2h WT Analyse1
47.2 2h AS9 Analyse1
43.2 2h AS9 Analyse1
22.4 2h AS9 Analyse1
38.03 2h AS9 Analyse1
Time Bacteria contrast estimate SE df t.ratio p.value adj.p.value
1h . AS9 - WT -22.1525 7.26096742291733 12 -3.05090199552218 0.0100678262133652 0.0201356524267304
2h . AS9 - WT -22.6175 7.26096742291733 12 -3.11494304858245 0.00893838763173974 0.0201356524267304
. AS9 1h - 2h -12.175 7.26096742291733 12 -1.67677380862126 0.119421678219412 0.119421678219412
. WT 1h - 2h -12.64 7.26096742291733 12 -1.74081486168154 0.107269024007513 0.119421678219412
Bacteria m1 m2 t df p.value
AS9 - WT 31.62 54.005 -3.862066753 13.147177098 0.001923959
contrast estimate SE df t.ratio p.value adj.p.value
AS9 - AS9compCT622 -7.75444444444445 3.90458979254483 24 -1.98598184609566 0.0585718167996552 0.0585718167996552
AS9 - WT -22.6973333333333 3.5708613338574 24 -6.35626287644575 0.00000142839345093559 0.00000428518035280676
AS9compCT622 - WT -14.9428888888889 3.75992479095553 24 -3.97425207143342 0.000562315710030261 0.000843473565045391
contrast estimate SE df t.ratio p.value adj.p.value
AS9 - AS9compCT622 -7.81163990580175 3.66165816443645 28 -2.1333613229306 0.0417966092372082 0.0417966092372082
AS9 - WT -22.850976236352 3.3503858107396 28 -6.82040144842529 0.000000207505607993279 0.000000622516823979837
AS9compCT622 - WT -15.0393363305502 3.53161673408233 28 -4.25848484220021 0.000209372992111463 0.000314059488167195
contrast estimate SE df t.ratio p.value adj.p.value
AS9 - AS9compCT622 -7.76558677187199 3.66192880939093 28.0070105715396 -2.12062745511527 0.0429420549084249 0.0429420549084249
AS9 - WT -22.7659550660201 3.35136924439369 28.0284900002451 -6.7930309690894 0.000000221714600004207 0.000000665143800012622
AS9compCT622 - WT -15.0003682941481 3.53181959062063 28.0053965906149 -4.24720683185071 0.000215793887988249 0.000323690831982374
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment