From 20a22e1e93eddb69a829fff3620ff7f00835718b Mon Sep 17 00:00:00 2001 From: Thomas OBADIA <tobadia@ml24-0166.corp.pasteur.fr> Date: Tue, 12 Nov 2024 17:08:44 +0100 Subject: [PATCH] Following discovery of inconsistencies in REDCap automatic calculations with datediff, implement a curation pipeline as step 02 in the 02_OBSERVATIONAL project. The current curation is for fixing the agey calculation --- .../OBSERVATIONAL_00_R_environment.R | 13 +++ .../OBSERVATIONAL_02_curate_REDCap_raw_data.R | 80 +++++++++++++++++++ ...SERVATIONAL_03_merge_inventory_metadata.R} | 0 3 files changed, 93 insertions(+) create mode 100644 02_OBSERVATIONAL/OBSERVATIONAL_02_curate_REDCap_raw_data.R rename 02_OBSERVATIONAL/{OBSERVATIONAL_02_merge_inventory_metadata.R => OBSERVATIONAL_03_merge_inventory_metadata.R} (100%) diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_00_R_environment.R b/02_OBSERVATIONAL/OBSERVATIONAL_00_R_environment.R index 59bdeb3..ef3c615 100644 --- a/02_OBSERVATIONAL/OBSERVATIONAL_00_R_environment.R +++ b/02_OBSERVATIONAL/OBSERVATIONAL_00_R_environment.R @@ -77,3 +77,16 @@ DATA_EXTRACT_EXPIRY_TIME_D <- 1 ###################################################################### ## A global seed value to be used by set.seed() calls SEED <- 12345 + + + + + +###################################################################### +### CURATION +###################################################################### +## Global flags that defines if a curation rule should be applied to +## the raw data dumped from REDCap or not. +# - TRUE : curation rule will be enforced +# - FALSE: curation rule will not be enforced (default) +CURATE_DATA_RECALCULATE_AGEY <- TRUE diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_02_curate_REDCap_raw_data.R b/02_OBSERVATIONAL/OBSERVATIONAL_02_curate_REDCap_raw_data.R new file mode 100644 index 0000000..3100c4b --- /dev/null +++ b/02_OBSERVATIONAL/OBSERVATIONAL_02_curate_REDCap_raw_data.R @@ -0,0 +1,80 @@ +## OBSERVATIONAL_02_curate_REDCap_raw_data.R +## Date : 2024/11/12 +## Author : Thomas Obadia +## +## This script processes the raw data dump from +## OBSERVATIONAL_01_dump_REDCap_database.R and applies a series of +## curation rules. +## This particular script was developped after we realizes some fields +## automatically calculated by REDCalp may yield inconsistent results +## as a resulf of bugs that are fixed by the REDCap development team. +## The primary example is the agey variable, for which calculated +## values are insonsitent depending on availability of a true DoB or +## when it is estimated. +###################################################################### + + + + + +###################################################################### +### SOURCE THE R ENVIRONMENT +###################################################################### +source("./02_OBSERVATIONAL/OBSERVATIONAL_01_dump_REDCap_database.R") + + + + + +###################################################################### +### APPLY CURATION RULES +###################################################################### +## Start from the raw dataset generated in OBSERVATIONAL_01_dump_REDCap_database.R +dat_observational_curated <- dat_observational_raw %>% + # Go through each curation flag + mutate(agey = case_when(CURATE_DATA_RECALCULATE_AGEY & (dob_yn %in% c(1, "Yes") & !is.na(dob)) ~ round(x = as.numeric(difftime(time1 = visdat, + time2 = dob, + units = "days")) / 365.25, + digits = 0), + CURATE_DATA_RECALCULATE_AGEY & (dob_yn %in% c(0, "No") | is.na(dob)) ~ round(x = as.numeric(difftime(time1 = visdat, + time2 = as.Date(x = calc_dob_from_myob, + format = "%d-%m-%Y"), + units = "days")) / 365.25, + digits = 0), + .default = agey)) + + + + + +###################################################################### +### WRITE RAW DATA TO OUTPUT DIRECTORY +###################################################################### +## Name of output file +OBSERVATIONAL_OUT_02_FILENAME <- paste0("OBSERVATIONAL_OUT_02_raw-curated_data", + "_country-", + paste(unique(dat_observational_raw$country), collapse = "-"), + "_timestamp-", + strftime(Sys.time(), format = "%Y%m%d_%H%M%S"), + ".csv") + +## Write to output file +write.table(dat_observational_raw, + file = paste0("./02_OBSERVATIONAL/outputs/", + OBSERVATIONAL_OUT_02_FILENAME), + sep = ",", + dec = ".", + quote = TRUE, + col.names = TRUE, + row.names = FALSE) + + + + + +###################################################################### +### UPDATE DATA_EXTRACT_IS_RECENT_OBS +###################################################################### +DATA_EXTRACT_IS_RECENT_OBS <- as.logical(difftime(time1 = Sys.Date(), + time2 = as.Date(ifelse(exists("DATA_EXTRACT_TS_OBS"), DATA_EXTRACT_TS_OBS, DATA_EXTRACT_TS_DEFAULT)), + units = "days") <= DATA_EXTRACT_EXPIRY_TIME_D) diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_02_merge_inventory_metadata.R b/02_OBSERVATIONAL/OBSERVATIONAL_03_merge_inventory_metadata.R similarity index 100% rename from 02_OBSERVATIONAL/OBSERVATIONAL_02_merge_inventory_metadata.R rename to 02_OBSERVATIONAL/OBSERVATIONAL_03_merge_inventory_metadata.R -- GitLab