From 8c42f1fa274f8dbf53e8e89d92e16ae50a68c47e Mon Sep 17 00:00:00 2001 From: Thomas OBADIA <thomas.obadia@pasteur.fr> Date: Thu, 17 Oct 2024 15:48:31 +0200 Subject: [PATCH] Fix some more issues introduced by dumping the databases with categorical data as labels and a more major issue where record_id would still be selected in subsetting some lists when it did not exist anymore. Conversely, the censusid does not need to be calculated as part of the get_target_list_for_observational_study() function anymore. --- 01_INVENTORY/INVENTORY_00_R_functions.R | 17 +++-------------- .../INVENTORY_01_dump_REDCap_database.R | 4 ++-- .../INVENTORY_03_list_all_inventory_houses.R | 5 ++--- ...st_of_participants_for_observational_study.R | 4 ++-- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/01_INVENTORY/INVENTORY_00_R_functions.R b/01_INVENTORY/INVENTORY_00_R_functions.R index 9cd9c3f..6f56732 100644 --- a/01_INVENTORY/INVENTORY_00_R_functions.R +++ b/01_INVENTORY/INVENTORY_00_R_functions.R @@ -90,12 +90,7 @@ get_target_list_for_observational_study <- function(x, n_target, n_backup) { # Order sampling list to appear grouped by house, for ease of use observational_list_p_main <- bind_rows(observational_list_p_main) %>% arrange(country, clusterid, hid, nested_hhid, nested_subjid) %>% - mutate(censusid = paste0(country, "-", - sprintf("%02d", clusterid), "-", - "H", sprintf("%03d", hid), "-", - sprintf("%02s", nested_hhid), "-", - sprintf("%02s", nested_subjid)), - list_name = "main") + mutate(list_name = "main") # A backup list is generated after excluding people participants # already sampled in the main list @@ -107,19 +102,13 @@ get_target_list_for_observational_study <- function(x, n_target, n_backup) { "n_target" = sampling_plan$nested_backup_sampling_n, MoreArgs = list("x" = x %>% anti_join(observational_list_p_main %>% - select(record_id, - country, clusterid, hid, nested_hhid, nested_subjid))), + select(country, clusterid, hid, nested_hhid, nested_subjid))), SIMPLIFY = FALSE) # Order sampling list to appear grouped by house, for ease of use observational_list_p_backup <- bind_rows(observational_list_p_backup) %>% arrange(country, clusterid, hid, nested_hhid, nested_subjid) %>% - mutate(censusid = paste0(country, "-", - sprintf("%02d", clusterid), "-", - "H", sprintf("%03d", hid), "-", - sprintf("%02s", nested_hhid), "-", - sprintf("%02s", nested_subjid)), - list_name = "backup") + mutate(list_name = "backup") # Return the full list res <- observational_list_p_main %>% diff --git a/01_INVENTORY/INVENTORY_01_dump_REDCap_database.R b/01_INVENTORY/INVENTORY_01_dump_REDCap_database.R index 6ba3730..ea8dbdf 100644 --- a/01_INVENTORY/INVENTORY_01_dump_REDCap_database.R +++ b/01_INVENTORY/INVENTORY_01_dump_REDCap_database.R @@ -110,8 +110,8 @@ if (!DATA_EXTRACT_IS_RECENT_INV) { mutate(gps_is_manual = case_when(is.na(gps_lat) & is.na(gps_lon) & !is.na(gps_lat_manual) & !is.na(gps_lon_manual) ~ TRUE, !is.na(gps_lat) & !is.na(gps_lon) & is.na(gps_lat_manual) & is.na(gps_lon_manual) ~ FALSE), gps_is_valid = FALSE, - gps_is_valid = case_when(((country == "E" & (gps_lat >= GPS_LAT_MIN_E & gps_lat <= GPS_LAT_MAX_E & gps_lon >= GPS_LON_MIN_E & gps_lon <= GPS_LON_MAX_E)) | - (country == "M" & (gps_lat >= GPS_LAT_MIN_M & gps_lat <= GPS_LAT_MAX_M & gps_lon >= GPS_LON_MIN_M & gps_lon <= GPS_LON_MAX_M))) ~ TRUE)) %>% + gps_is_valid = case_when(((country %in% c("E", "Ethiopia") & (gps_lat >= GPS_LAT_MIN_E & gps_lat <= GPS_LAT_MAX_E & gps_lon >= GPS_LON_MIN_E & gps_lon <= GPS_LON_MAX_E)) | + (country %in% c("M", "Madagascar") & (gps_lat >= GPS_LAT_MIN_M & gps_lat <= GPS_LAT_MAX_M & gps_lon >= GPS_LON_MIN_M & gps_lon <= GPS_LON_MAX_M))) ~ TRUE)) %>% # Add data source for convenience mutate(data_source = RCON) diff --git a/01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R b/01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R index 7748b7e..0b0d946 100644 --- a/01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R +++ b/01_INVENTORY/INVENTORY_03_list_all_inventory_houses.R @@ -40,11 +40,10 @@ inventory_list_h <- dat_inventory_raw %>% # Merge in the number of individuals per house-and-household left_join(inventory_list_p %>% - group_by(record_id, country, clusterid, hid) %>% + group_by(country, clusterid, hid) %>% summarize(n_living_at_house = n(), .groups = "keep"), - by = join_by("record_id" == "record_id", - "country" == "country", + by = join_by("country" == "country", "clusterid" == "clusterid", "hid" == "hid")) diff --git a/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R b/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R index 06284a3..15fa9b7 100644 --- a/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R +++ b/01_INVENTORY/INVENTORY_05_generate_list_of_participants_for_observational_study.R @@ -33,8 +33,8 @@ set.seed(seed = SEED) ### GENERATE SAMPLING LIST ###################################################################### observational_list_p <- get_target_list_for_observational_study(x = inventory_list_p, - n_target = c("E" = 220, - "M" = 110), + n_target = c("Ethiopia" = 220, + "Madagascar" = 110), n_backup = 50) -- GitLab