diff --git a/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R b/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R index 15fa9b76c9ca9cfb241dba38f9e1e2868690387a..d032e135587af75bcc7926387c6454e3f564c643 100644 --- a/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R +++ b/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R @@ -1,4 +1,4 @@ -## INVENTORY_05_select_list_function.R +## INVENTORY_05_generate_list_of_participants_for_observational_study.R ## Date : 2024/02/02 ## Author : Eliharintsoa Rajaoranimirana, Thomas Obadia ## diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_01_curated_data_integrity.R b/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_01_curated_data_integrity.R new file mode 100644 index 0000000000000000000000000000000000000000..f3b7fa1ccb5550dd0b419602b30626e0bf7acadc --- /dev/null +++ b/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_01_curated_data_integrity.R @@ -0,0 +1,20 @@ +## OBSERVATIONAL_03_QC_curated_data.R +## Date : 2024/12/04 +## Author : Thomas Obadia +## +## This script processes the curated dataset from +## OBSERVATIONAL_02_curate_REDCap_raw_data.R and applies a series of +## QC rules. +## It returns a distinct dataset with columns corresponding to the +## outcome of each QC rule. Any 'TRUE' in these columns should warrant +## further investigation and clarification by study team. +###################################################################### + + + + + +###################################################################### +### SOURCE THE DATA +###################################################################### +source("./02_OBSERVATIONAL/OBSERVATIONAL_02_curate_REDCap_raw_data.R") \ No newline at end of file diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_02_merging_observational_and_inventory_data_integrity.R b/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_02_merging_observational_and_inventory_data_integrity.R new file mode 100644 index 0000000000000000000000000000000000000000..0f963992a6dd4c8d6991eaa3292e875d8b945413 --- /dev/null +++ b/02_OBSERVATIONAL/OBSERVATIONAL_03_QC_02_merging_observational_and_inventory_data_integrity.R @@ -0,0 +1,82 @@ +## OBSERVATIONAL_04_merge_inventory_metadata.R +## Date : 2024/10/17 +## Author : Thomas Obadia +## +## At the population inventory phase, many descriptors including +## GPS coordinates of houses were taken. +## The subset of individuals enrolled in the observational study +## will make use of these metadata to link with epidemiological and +## clinical data such as lab results, malaria prevalence etc. +## +## This script will amend the data from the observational databases +## with descriptors from the inventory databases, using the ID +## allocated at inventory phase. +###################################################################### + + + + + +###################################################################### +### SOURCE THE DATABASES +###################################################################### +if (!exists("DATA_EXTRACT_IS_RECENT_OBS") || !DATA_EXTRACT_IS_RECENT_OBS) { + cat("Databases are outdated and will be dumped again.\n") + source("./01_INVENTORY/INVENTORY_02_list_all_inventory_participants.R") + source("./01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R") + source("./02_OBSERVATIONAL/OBSERVATIONAL_01_dump_REDCap_database.R") +} + + + + + +###################################################################### +### MERGE INVENTORY AND OBSERVATIONAL DATA +###################################################################### +### The list of individuals from the inventory phase is stored in the +### inventory_list_p table. It merely contains the CensusID (which +### encodes the country, cluster, house, household and subject), +### as well as age and gender. +### As part of the observational study, the same data was collected and +### *should* report the CensusID when it was available. +### This section will confront demographics from both studies, and +### explore if reconciling these two cross-sectional datasets is +### feasible. + +## In the observational data, record_id differs across countries: +## - Ethiopia used consecutive autonumbering +## - Madagascar used censusid +## Check that censusid is actually redundant with record_id in Madagascar +dat_observational_curated %>% + mutate(record_id_is_censusid = (record_id == censusid)) %>% + count(country, record_id_is_censusid, + .drop = FALSE) + +tmp = dat_observational_curated %>% + select(censusid, consent, sex, agey) %>% + # REDCap labels were translated in Madagascar. + # Handle it here, before it's maybe handled before at the curation stage? + mutate(consent = plyr::mapvalues(x = consent, + from = c("Oui", "Non"), + to = c("Yes", "No")), + sex = plyr::mapvalues(x = sex, + from = c("Féminin", "Masculin"), + to = c("Female", "Male"))) %>% + full_join(inventory_list_p %>% + select(censusid, sex, agey), + by = join_by(censusid == censusid), + suffix = c(".obs", ".inv")) %>% + filter(consent == "Yes") %>% + separate_wider_regex(cols = censusid, + patterns = c(country = "^(?:E|M)", + "-", + clusterid = "\\d{2}", + "-", + "H", + houseid = "\\d{3}", + "-", + nested_hhid = "\\d{2}", + "-", + nested_subjid = "\\d{2}"), too_few = "debug") + diff --git a/02_OBSERVATIONAL/OBSERVATIONAL_03_merge_inventory_metadata.R b/02_OBSERVATIONAL/OBSERVATIONAL_03_merge_inventory_metadata.R deleted file mode 100644 index f112c0c388f26dceefaef8e3897731b4c8a4f6e8..0000000000000000000000000000000000000000 --- a/02_OBSERVATIONAL/OBSERVATIONAL_03_merge_inventory_metadata.R +++ /dev/null @@ -1,36 +0,0 @@ -## OBSERVATIONAL_02_merge_inventory_metadata.R -## Date : 2024/10/17 -## Author : Thomas Obadia -## -## At the population inventory phase, many descriptors including -## GPS coordinates of houses were taken. -## The subset of individuals enrolled in the observational study -## will make use of these metadata to link with epidemiological and -## clinical data such as lab results, malaria prevalence etc. -## -## This script will amend the data from the observational databases -## with descriptors from the inventory databases, using the ID -## allocated at inventory phase. -###################################################################### - - - - - -###################################################################### -### SOURCE THE DATABASES -###################################################################### -if (!exists("DATA_EXTRACT_IS_RECENT_OBS") || !DATA_EXTRACT_IS_RECENT_OBS) { - cat("Databases are outdated and will be dumped again.\n") - source("./01_INVENTORY/INVENTORY_02_list_all_inventory_participants.R") - source("./01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R") - source("./02_OBSERVATIONAL/OBSERVATIONAL_01_dump_REDCap_database.R") -} - - - - - -###################################################################### -### -######################################################################