Skip to content
Snippets Groups Projects
Commit 8c42f1fa authored by Thomas  OBADIA's avatar Thomas OBADIA :speech_balloon:
Browse files

Fix some more issues introduced by dumping the databases with categorical data...

Fix some more issues introduced by dumping the databases with categorical data as labels and a more major issue where record_id would still be selected in subsetting some lists when it did not exist anymore. Conversely, the censusid does not need to be calculated as part of the get_target_list_for_observational_study() function anymore.
parent 8cbd97ad
No related branches found
No related tags found
No related merge requests found
......@@ -90,12 +90,7 @@ get_target_list_for_observational_study <- function(x, n_target, n_backup) {
# Order sampling list to appear grouped by house, for ease of use
observational_list_p_main <- bind_rows(observational_list_p_main) %>%
arrange(country, clusterid, hid, nested_hhid, nested_subjid) %>%
mutate(censusid = paste0(country, "-",
sprintf("%02d", clusterid), "-",
"H", sprintf("%03d", hid), "-",
sprintf("%02s", nested_hhid), "-",
sprintf("%02s", nested_subjid)),
list_name = "main")
mutate(list_name = "main")
# A backup list is generated after excluding people participants
# already sampled in the main list
......@@ -107,19 +102,13 @@ get_target_list_for_observational_study <- function(x, n_target, n_backup) {
"n_target" = sampling_plan$nested_backup_sampling_n,
MoreArgs = list("x" = x %>%
anti_join(observational_list_p_main %>%
select(record_id,
country, clusterid, hid, nested_hhid, nested_subjid))),
select(country, clusterid, hid, nested_hhid, nested_subjid))),
SIMPLIFY = FALSE)
# Order sampling list to appear grouped by house, for ease of use
observational_list_p_backup <- bind_rows(observational_list_p_backup) %>%
arrange(country, clusterid, hid, nested_hhid, nested_subjid) %>%
mutate(censusid = paste0(country, "-",
sprintf("%02d", clusterid), "-",
"H", sprintf("%03d", hid), "-",
sprintf("%02s", nested_hhid), "-",
sprintf("%02s", nested_subjid)),
list_name = "backup")
mutate(list_name = "backup")
# Return the full list
res <- observational_list_p_main %>%
......
......@@ -110,8 +110,8 @@ if (!DATA_EXTRACT_IS_RECENT_INV) {
mutate(gps_is_manual = case_when(is.na(gps_lat) & is.na(gps_lon) & !is.na(gps_lat_manual) & !is.na(gps_lon_manual) ~ TRUE,
!is.na(gps_lat) & !is.na(gps_lon) & is.na(gps_lat_manual) & is.na(gps_lon_manual) ~ FALSE),
gps_is_valid = FALSE,
gps_is_valid = case_when(((country == "E" & (gps_lat >= GPS_LAT_MIN_E & gps_lat <= GPS_LAT_MAX_E & gps_lon >= GPS_LON_MIN_E & gps_lon <= GPS_LON_MAX_E)) |
(country == "M" & (gps_lat >= GPS_LAT_MIN_M & gps_lat <= GPS_LAT_MAX_M & gps_lon >= GPS_LON_MIN_M & gps_lon <= GPS_LON_MAX_M))) ~ TRUE)) %>%
gps_is_valid = case_when(((country %in% c("E", "Ethiopia") & (gps_lat >= GPS_LAT_MIN_E & gps_lat <= GPS_LAT_MAX_E & gps_lon >= GPS_LON_MIN_E & gps_lon <= GPS_LON_MAX_E)) |
(country %in% c("M", "Madagascar") & (gps_lat >= GPS_LAT_MIN_M & gps_lat <= GPS_LAT_MAX_M & gps_lon >= GPS_LON_MIN_M & gps_lon <= GPS_LON_MAX_M))) ~ TRUE)) %>%
# Add data source for convenience
mutate(data_source = RCON)
......
......@@ -40,11 +40,10 @@ inventory_list_h <- dat_inventory_raw %>%
# Merge in the number of individuals per house-and-household
left_join(inventory_list_p %>%
group_by(record_id, country, clusterid, hid) %>%
group_by(country, clusterid, hid) %>%
summarize(n_living_at_house = n(),
.groups = "keep"),
by = join_by("record_id" == "record_id",
"country" == "country",
by = join_by("country" == "country",
"clusterid" == "clusterid",
"hid" == "hid"))
......
......@@ -33,8 +33,8 @@ set.seed(seed = SEED)
### GENERATE SAMPLING LIST
######################################################################
observational_list_p <- get_target_list_for_observational_study(x = inventory_list_p,
n_target = c("E" = 220,
"M" = 110),
n_target = c("Ethiopia" = 220,
"Madagascar" = 110),
n_backup = 50)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment