diff --git a/03_INTERVENTIONAL_M0/INTERVENTIONAL_M0_02_QC_01_data_integrity.R b/03_INTERVENTIONAL_M0/INTERVENTIONAL_M0_02_QC_01_data_integrity.R new file mode 100644 index 0000000000000000000000000000000000000000..33345d8620b40c1360b91d0104e2c8c16279f376 --- /dev/null +++ b/03_INTERVENTIONAL_M0/INTERVENTIONAL_M0_02_QC_01_data_integrity.R @@ -0,0 +1,676 @@ +## INTERVENTIONAL_02_QC_01_curated_data_integrity.R +## Date : 2025/05/19 +## Author : Thomas Obadia +## +## This script processes the raw (so far) dataset that follows a data +## dump by INTERVENTIONAL_M0_01_dump_REDCap_database.R and generates +## a set of data quality indicators. +## +## The first section of this script will subset the base dataset into +## multiple, smaller versions that will be used for QC on different +## aspects: +## - Screening, eligibility and content +## - Allocation of unique Participant ID +## - Demographics +## - To be continued... +###################################################################### + + + + + +###################################################################### +### SOURCE THE DATA +###################################################################### +source("./03_INTERVENTIONAL_M0/INTERVENTIONAL_M0_01_dump_REDCap_database.R") + + + + + +###################################################################### +### DATA SUBSET +###################################################################### +## Data subset for QC-ing the screening & eligibility process +DAT_QC_SCREENING_ELIGIBILITY_CONSENT <- dat_interventional_m0_raw %>% + filter(redcap_event_name %in% c("Baseline Survey (M0) (Arm 1: Intervention)", + "1")) %>% + select(record_id, + + redcap_event_name, redcap_repeat_instrument, redcap_repeat_instance, + + country, clusterid, hid, subjid, + + eligibility_chk_01, eligibility_chk_02, eligibility_chk_03, consent, + eligibility_chk_sum, + + uniqueid, uniqueid2, + + enroldat) + + +## Data subset for process of allocating a unique Participant ID +DAT_QC_ALLOCATING_PARTICIPANT_ID <- DAT_QC_SCREENING_ELIGIBILITY_CONSENT %>% + filter(eligibility_chk_sum == 4) + + +## Data subset for participant demographics +# All rows are preserved, but further filtering will happen when calculating +# the QC flags to retain only entries for participants who were properly +# enrolled in the study. +DAT_QC_DEMOGRAPHICS <- dat_interventional_m0_raw %>% + select(record_id, + + redcap_event_name, redcap_repeat_instrument, redcap_repeat_instance, + + country, clusterid, hid, subjid, uniqueid, + + sex, + dob_yn, dob, agey_estimate, agem_estimate, agey) + + +## Data subset for malaria diagnostics and sample collection +# All rows are preserved, but further filtering will happen when calculating +# the QC flags to retain only entries for participants who were properly +# enrolled in the study. +DAT_QC_MALARIA_DIAG_AND_SAMPLE <- dat_interventional_m0_raw %>% + select(record_id, + + redcap_event_name, redcap_repeat_instrument, redcap_repeat_instance, + + country, clusterid, hid, subjid, uniqueid, + + bednet_yn, bednet_uselastn_yn, + malaria_last6m_yn, malaria_last6m_diag, + malaria_drugs_last6m_yn, malaria_drugs_last6m_datdd, malaria_drugs_last6m_datmm, malaria_drugs_last6m_datyyyy, + + vs_fever_last2d_bls_yn, vs_temp_bls, + + lb_rdt_done_bls_yn, lb_rdt_result_bls_e, lb_rdt_result_bls_m, + referred_bls_yn, referred_name_bls, + treated_bls_yn, starts_with("treated_name_bls___"), treated_name_other_bls, + + starts_with("lb_bloodsample_collected_bls___"), lb_bloodsample_notcollected_det, + lb_samplecode_bls, lb_samplecode_bls_checked + ) + + + + + +###################################################################### +### PARTICIPANT SCREENING + ELIGIBILITY & CONSENT +###################################################################### +### Here we derive row-wise flags that will identify data inconsistencies that +### require clarification from the field team. + +## Table with QC queries +QC_SCREENING_ELIGIBILITY_CONSENT <- DAT_QC_SCREENING_ELIGIBILITY_CONSENT %>% + rowwise() %>% + mutate( + # Missing or incorrect country code + QC_COUNTRY_IS_MISSING_OR_INCORRECT = case_when(is.na(country) ~ TRUE, + !(country %in% c("E", "Ethiopia", + "M", "Madagascar", "Madagasikara")) ~ TRUE, + .default = FALSE), + + # Missing Cluster ID or Cluster ID not part of those randomized + QC_CLUSTERID_IS_MISSING_OR_INCORRECT = case_when(is.na(clusterid) ~ TRUE, + !( + (country %in% c("E", "Ethiopia") & + clusterid %in% c(NA)) | + (country %in% c("M", "Madagascar", "Madagasikara") & + clusterid %in% (PVSTATEM_RANDOMIZATION %>% filter(country == "M") %>% pull(cluster)))) ~ TRUE, + .default = FALSE), + + # Missing or incorrect House ID + QC_HOUSEID_IS_MISSING_OR_INCORRECT = case_when(is.na(hid) ~ TRUE, + !grepl(pattern = "^H?\\d{3}$", + x = hid) ~ TRUE, + .default = FALSE), + + # Consent missing but should have been documented + QC_CONSENT_IS_MISSING_BUT_PATIENT_ELIGIBLE = case_when(is.na(consent) & + eligibility_chk_01 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_02 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_03 %in% REDCAP_LABELS_RADIO_YES ~ TRUE, + .default = FALSE), + + # Consent obtained when it should not have been + QC_CONSENT_OBTAINED_BUT_PATIENT_NOT_ELIGIBLE = case_when(consent %in% REDCAP_LABELS_RADIO_YES & eligibility_chk_sum != 4 ~ TRUE, + .default = FALSE), + + # Participant consented but Participant ID is missing in at least + # one of the two fields + QC_PATIENT_CONSENTED_BUT_UNIQUEID_IS_MISSING = case_when(consent %in% REDCAP_LABELS_RADIO_YES & (is.na(uniqueid) | is.na(uniqueid2)) ~ TRUE, + consent %in% REDCAP_LABELS_RADIO_YES & !is.na(uniqueid) & !is.na(uniqueid2) ~ FALSE, + .default = NA), + + # Participant consented but Participant ID is not identical across the two + # uniqueid fields + QC_PATIENT_CONSENTED_BUT_UNIQUEID_IS_MISMATCH = case_when(consent %in% REDCAP_LABELS_RADIO_YES & uniqueid != uniqueid2 ~ TRUE, + consent %in% REDCAP_LABELS_RADIO_YES & uniqueid == uniqueid2 ~ FALSE, + .default = NA), + + # Participant was properly enrolled, allocated an ID, but date of + # enrollment is missing or before 2025-05-01 + QC_ENROLDAT_IS_MISSING_OR_INCORRECT = case_when(uniqueid == uniqueid2 & (is.na(enroldat) | as.Date(enroldat) < "2025-05-01") ~ TRUE, + uniqueid == uniqueid2 & !is.na(enroldat) & as.Date(enroldat) >= "2025-05-01" ~ FALSE, + .default = NA) + ) + + +## Number of participants screened, found eligible, and possibly enrolled +DAT_QC_SCREENING_ELIGIBILITY_CONSENT %>% + mutate(screened = TRUE, + eligible = case_when(eligibility_chk_01 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_02 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_03 %in% REDCAP_LABELS_RADIO_YES ~ TRUE, + (!is.na(eligibility_chk_01) & + !is.na(eligibility_chk_02) & + !is.na(eligibility_chk_03) & + any(!eligibility_chk_01 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_02 %in% REDCAP_LABELS_RADIO_YES & + eligibility_chk_03 %in% REDCAP_LABELS_RADIO_YES)) ~ FALSE, + .default = NA), + consent = case_when(consent %in% REDCAP_LABELS_RADIO_YES ~ TRUE, + consent %in% REDCAP_LABELS_RADIO_NO ~ FALSE, + .default = NA), + enrolled_ok = case_when(consent & uniqueid == uniqueid2 ~ TRUE, + .default = NA)) %>% + group_by(country, clusterid) %>% + summarize(n_screened = sum(screened, na.rm = TRUE), + n_eligible = sum(eligible, na.rm = TRUE), + n_eligible_NA = sum(is.na(eligible)), + n_consent = sum(consent, na.rm = TRUE), + n_consent_NA = sum(is.na(consent)), + n_enrolled_ok = sum(enrolled_ok, na.rm = TRUE)) + + + + + +###################################################################### +### ALLOCATION OF PARTICIPANT ID +###################################################################### +### This section will run quality routines on the allocation of unique +### participant identifiers, therefore covering the set of participants that +### were deemed eligible and should theorietically have received a +### Participant ID. + +## Table with QC queries +DAT_QC_ALLOCATING_PARTICIPANT_ID <- DAT_QC_ALLOCATING_PARTICIPANT_ID %>% + rowwise() %>% + mutate( + # Record ID does not match the expected structure + QC_RECORD_ID_HAS_INCORRECT_STRUCTURE = case_when(!grepl(pattern = "^P-\\d{5}$", + x = record_id) ~ TRUE, + grepl(pattern = "^P-\\d{5}$", + x = record_id) ~ FALSE, + .default = NA), + + # UniqueID is missing in one or more of the two fields + QC_UNIQUEID_OR_UNIQUEID2_IS_MISSING = case_when(is.na(uniqueid) | is.na(uniqueid2) ~ TRUE, + !is.na(uniqueid) & !is.na(uniqueid2) ~ FALSE, + .default = NA), + + # UniqueID has incorrect structure in one or more of the two fields + QC_UNIQUEID_OR_UNIQUEID2_HAS_INCORRECT_STRUCTURE = case_when(!grepl(pattern = paste0("^", + "(?:", + paste0(PVSTATEM_RANDOMIZATION$country, + "-", + PVSTATEM_RANDOMIZATION$cluster, + collapse = "|"), + ")", + "-", + "\\d{3}", + "$"), + x = uniqueid) & + !grepl(pattern = paste0("^", + "(?:", + paste0(PVSTATEM_RANDOMIZATION$country, + "-", + PVSTATEM_RANDOMIZATION$cluster, + collapse = "|"), + ")", + "-", + "\\d{3}", + "$"), + x = uniqueid2) ~ TRUE, + grepl(pattern = paste0("^", + "(?:", + paste0(PVSTATEM_RANDOMIZATION$country, + "-", + PVSTATEM_RANDOMIZATION$cluster, + collapse = "|"), + ")", + "-", + "\\d{3}", + "$"), + x = uniqueid) & + grepl(pattern = paste0("^", + "(?:", + paste0(PVSTATEM_RANDOMIZATION$country, + "-", + PVSTATEM_RANDOMIZATION$cluster, + collapse = "|"), + ")", + "-", + "\\d{3}", + "$"), + x = uniqueid2) ~ FALSE, + .default = NA), + + # UniqueID has incorrect structure in one or more of the two fields + QC_UNIQUEID_DOES_NOT_MATCH_UNIQUEID2 = case_when(uniqueid != uniqueid2 ~ TRUE, + uniqueid == uniqueid2 ~ FALSE, + .default = NA) + ) %>% + ungroup %>% + + # QC aggregated by a specific factor, to check uniqueness + group_by(uniqueid) %>% + mutate( + # UniqueID is not unique across enrolled participants + QC_UNIQUEID_IS_NOT_UNIQUE = n() > 1 + ) %>% + ungroup %>% + # QC aggregated by a specific factor, to check uniqueness + group_by(uniqueid2) %>% + mutate( + # UniqueID2 is not unique across enrolled participants + QC_UNIQUEID2_IS_NOT_UNIQUE = n() > 1 + ) %>% + ungroup + + + + + +###################################################################### +### PARTICIPANT DEMOGRAPHICS +###################################################################### +### This section will run quality routines on demographcis for participant +### that passed screening and were enrolled into the study. +### +### For now, the data used as input is *not* restricted to the subset of +### participants who were allocated a proper, unique Participant ID. +### This table is furtehr restricted to participants who have been allocated +### a Participant ID, *BUT NOT TO THOSE WITH A VALIDATED, UNIQUE PARTICIPANT ID*. +### +### This allows for looking at demographics for all participants enrolled, +### although some of them may have ID-related QC to solved in the first place. +## Table with QC queries +DAT_QC_DEMOGRAPHICS <- QC_SCREENING_ELIGIBILITY_CONSENT %>% + select(record_id, + + redcap_event_name, redcap_repeat_instrument, redcap_repeat_instance, + + enroldat) %>% + right_join(DAT_QC_DEMOGRAPHICS) %>% + filter(!is.na(uniqueid)) %>% + rowwise() %>% + mutate( + # Sex is missing + QC_SEX_IS_MISSING = case_when(is.na(sex) ~ TRUE, + !is.na(sex) ~ FALSE, + .default = NA), + + # DOB is unknown and self-reported age is not available + QC_DOB_UNKNOWN_BUT_ESTIMATED_AGE_IS_MISSING = case_when(dob_yn %in% REDCAP_LABELS_RADIO_NO & is.na(agey_estimate) ~ TRUE, + dob_yn %in% REDCAP_LABELS_RADIO_NO & !is.na(agey_estimate) ~ FALSE, + .default = NA), + + # DOB is presumably known but content is empty or invalid + QC_DOB_KNOWN_BUT_DOB_IS_MISSING_OR_INCORRECT = case_when(dob_yn %in% REDCAP_LABELS_RADIO_YES & + (is.na(dob) | !grepl(pattern = "^(((\\d{2}([13579][26]|[2468][048]|04|08)|(1600|2[048]00))([-\\/])02(\\6)29)|(\\d{4}([-\\/])((0[1-9]|1[012])(\\9)(0[1-9]|1\\d|2[0-8])|((0[13-9]|1[012])(\\9)(29|30))|((0[13578]|1[02])(\\9)31))))$", + x = dob)) ~ TRUE, + dob_yn %in% REDCAP_LABELS_RADIO_YES & + !is.na(dob) & grepl(pattern = "^(((\\d{2}([13579][26]|[2468][048]|04|08)|(1600|2[048]00))([-\\/])02(\\6)29)|(\\d{4}([-\\/])((0[1-9]|1[012])(\\9)(0[1-9]|1\\d|2[0-8])|((0[13-9]|1[012])(\\9)(29|30))|((0[13578]|1[02])(\\9)31))))$", + x = dob) ~ FALSE, + .default = NA), + + # DOB is self-reported by months are missing in young child + QC_AGEM_IS_MISSING = case_when(!is.na(agey_estimate) & agey_estimate < 5 & is.na(agem_estimate) ~ TRUE, + !is.na(agey_estimate) & agey_estimate >= 5 ~ FALSE, + .default = NA), + + # Age is not available + QC_AGEY_IS_MISSING = case_when(is.na(agey) ~ TRUE, + !is.na(agey) ~ FALSE, + .default = NA), + + # Age is available but too high or too low + QC_AGEY_IS_OUT_OF_BOUNDS = case_when(!is.na(agey) & (agey >= 100 | agey < 1) ~ TRUE, + !is.na(agey) & agey < 100 & agey >= 1 ~ FALSE, + .default = NA), + + # Age is available but does not correspond to known DOB + QC_AGEY_IS_INCORRECT = case_when(!is.na(dob) & agey != round(as.numeric(difftime(time1 = enroldat, + time2 = dob, + units = "days")), + digits = 1) ~ TRUE, + !is.na(dob) & agey == round(as.numeric(difftime(time1 = enroldat, + time2 = dob, + units = "days")), + digits = 1) ~ FALSE, + .default = NA) + ) + + + + + +###################################################################### +### MALARIA DIAGNOSTIC & SAMPLE COLLECTION +###################################################################### +DAT_QC_MALARIA_DIAG_AND_SAMPLE <- QC_SCREENING_ELIGIBILITY_CONSENT %>% + select(record_id, + + redcap_event_name, redcap_repeat_instrument, redcap_repeat_instance) %>% + right_join(DAT_QC_MALARIA_DIAG_AND_SAMPLE) %>% + filter(!is.na(uniqueid)) %>% + rowwise() %>% + mutate( + # Availability of bednet is missing + QC_BEDNET_AVAILABILITY_IS_MISSING = case_when(is.na(bednet_yn) ~ TRUE, + !is.na(bednet_yn) ~ FALSE, + .default = NA), + + # Bednet is available but participant dit not report on usage last night + QC_BEDNET_AVAILABLE_BUT_LAST_USAGE_IS_MISSING = case_when(bednet_yn %in% REDCAP_LABELS_RADIO_YES & is.na(bednet_uselastn_yn) ~ TRUE, + bednet_yn %in% REDCAP_LABELS_RADIO_YES & !is.na(bednet_uselastn_yn) ~ FALSE, + .default = NA), + + # Report of a malaria episode over the past 6 months is missing + QC_MALARIA_REPORT_OVER_PAST_6_MONTHS_IS_MISSING = case_when(is.na(malaria_last6m_yn) ~ TRUE, + !is.na(malaria_last6m_yn) ~ FALSE, + .default = NA), + + # Patient had malaria over the past 6 months but information about + # diagnostic is missing + QC_MALARIA_REPORTED_OVER_PAST_6_MONTHS_BUT_DIAG_IS_MISSING = case_when(malaria_last6m_yn %in% REDCAP_LABELS_RADIO_YES & is.na(malaria_last6m_diag) ~ TRUE, + malaria_last6m_yn %in% REDCAP_LABELS_RADIO_YES & !is.na(malaria_last6m_diag) ~ FALSE, + .default = NA), + + # Patient had malaria over the past 6 months but did not report on any date + QC_MALARIA_REPORTED_OVER_PAST_6_MONTHS_BUT_DATE_IS_MISSING = case_when(malaria_last6m_yn %in% REDCAP_LABELS_RADIO_YES & + is.na(malaria_drugs_last6m_datdd) & + is.na(malaria_drugs_last6m_datmm) & + is.na(malaria_drugs_last6m_datyyyy) ~ TRUE, + malaria_last6m_yn %in% REDCAP_LABELS_RADIO_YES & + (!is.na(malaria_drugs_last6m_datdd) | + !is.na(malaria_drugs_last6m_datmm) | + !is.na(malaria_drugs_last6m_datyyyy)) ~ FALSE, + .default = NA), + + # Patient did not report on possible fever in the preceding 2 days + QC_SELF_REPORTED_FEVER_IS_MISSING = case_when(is.na(vs_fever_last2d_bls_yn) ~ TRUE, + !is.na(vs_fever_last2d_bls_yn) ~ FALSE, + .default = NA), + + # Patient temperature is missing + QC_TEMPERATURE_IS_MISSING = case_when(is.na(vs_temp_bls) | is.na(as.numeric(vs_temp_bls)) ~ TRUE, + !is.na(vs_temp_bls) & !is.na(as.numeric(vs_temp_bls)) ~ FALSE, + .default = NA), + + # Patient temperature is missing + QC_TEMPERATURE_IS_OUT_OF_BOUNDS = case_when(vs_temp_bls < 33 | vs_temp_bls > 42 ~ TRUE, + vs_temp_bls >= 33 & vs_temp_bls <= 42 ~ FALSE, + .default = NA), + + # Malaria RDT was not done or is missing in patient with a fever and should + # have been administered + QC_FEVER_REPORTED_BUT_RDT_IS_MISSING_OR_NOT_DONE = case_when((vs_fever_last2d_bls_yn %in% REDCAP_LABELS_RADIO_YES | vs_temp_bls >= 37.5) & + (is.na(lb_rdt_done_bls_yn) | lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_NO) ~ TRUE, + (vs_fever_last2d_bls_yn %in% REDCAP_LABELS_RADIO_NO & vs_temp_bls < 37.5) & + lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_NO ~ FALSE, + .default = NA), + + # Malaria RDT was done but Pf/pan result is missing (Madagascar only) + QC_RDT_PF_PAN_DONE_BUT_RESULT_IS_MISSING = case_when(lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + is.na(lb_rdt_result_bls_m) ~ TRUE, + lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + !is.na(lb_rdt_result_bls_m) ~ FALSE, + .default = NA), + + # Malaria Pf/pan RDT was positive but Pf/Pv result is missing (Madagascar + # only) + QC_RDT_PF_PAN_POSITIVE_BUT_RDT_PF_PV_RESULT_IS_MISSING = case_when(lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) & + is.na(lb_rdt_result_bls_e) ~ TRUE, + lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) & + !is.na(lb_rdt_result_bls_e) ~ FALSE, + .default = NA), + + # Malaria RDT was done but Pf/Pv result is missing (Madagascar only) + QC_RDT_PF_PV_DONE_BUT_RESULT_IS_MISSING = case_when(lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + ((country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) & + is.na(lb_rdt_result_bls_e)) | + (country %in% REDCAP_LABELS_RADIO_COUNTRY_E & + is.na(lb_rdt_result_bls_e))) ~ TRUE, + lb_rdt_done_bls_yn %in% REDCAP_LABELS_RADIO_YES & + ((country %in% REDCAP_LABELS_RADIO_COUNTRY_M & + lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PV_110, + REDCAP_LABELS_RADIO_RDT_PF_PV_101, + REDCAP_LABELS_RADIO_RDT_PF_PV_111) & + !is.na(lb_rdt_result_bls_e)) | + (country %in% REDCAP_LABELS_RADIO_COUNTRY_E & + !is.na(lb_rdt_result_bls_e))) ~ FALSE, + .default = NA), + + # Patient had a positive malaria RDT but detail about referral are missing + QC_RDT_POSITIVE_BUT_REFERRAL_STATUS_IS_MISSING = case_when((lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) | + lb_rdt_result_bls_e %in% c(REDCAP_LABELS_RADIO_RDT_PF_PV_110, + REDCAP_LABELS_RADIO_RDT_PF_PV_101, + REDCAP_LABELS_RADIO_RDT_PF_PV_111)) & + is.na(referred_bls_yn) ~ TRUE, + (lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) | + lb_rdt_result_bls_e %in% c(REDCAP_LABELS_RADIO_RDT_PF_PV_110, + REDCAP_LABELS_RADIO_RDT_PF_PV_101, + REDCAP_LABELS_RADIO_RDT_PF_PV_111)) & + !is.na(referred_bls_yn) ~ FALSE, + .default = NA), + + # Patient was referred after a positive malaria RDT but referral location + # is missing + QC_PATIENT_REFERRED_BUT_REFERRAL_LOCATION_IS_MISSING = case_when(referred_bls_yn %in% REDCAP_LABELS_RADIO_YES & is.na(referred_name_bls) ~ TRUE, + referred_bls_yn %in% REDCAP_LABELS_RADIO_YES & !is.na(referred_name_bls) ~ FALSE, + .default = NA), + + # Patient had a positive malaria RDT but detail about treatment are missing + QC_RDT_POSITIVE_BUT_TREATMENT_STATUS_IS_MISSING = case_when((lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) | + lb_rdt_result_bls_e %in% c(REDCAP_LABELS_RADIO_RDT_PF_PV_110, + REDCAP_LABELS_RADIO_RDT_PF_PV_101, + REDCAP_LABELS_RADIO_RDT_PF_PV_111)) & + is.na(treated_bls_yn) ~ TRUE, + (lb_rdt_result_bls_m %in% c(REDCAP_LABELS_RADIO_RDT_PF_PAN_110, + REDCAP_LABELS_RADIO_RDT_PF_PAN_101, + REDCAP_LABELS_RADIO_RDT_PF_PAN_111) | + lb_rdt_result_bls_e %in% c(REDCAP_LABELS_RADIO_RDT_PF_PV_110, + REDCAP_LABELS_RADIO_RDT_PF_PV_101, + REDCAP_LABELS_RADIO_RDT_PF_PV_111)) & + !is.na(treated_bls_yn) ~ FALSE, + .default = NA), + + # Patient was treated after a positive malaria RDT but detail about + # treatment administered are missing + QC_PATIENT_TREATED_BUT_TREATMENT_DESC_IS_MISSING = case_when(treated_bls_yn %in% REDCAP_LABELS_RADIO_YES & + (treated_name_bls___bs_01 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___bs_01 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___bs_02 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___bs_03 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___ls_01 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___98 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___99 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + treated_name_bls___unk %in% REDCAP_LABELS_CHECKBOX_UNCHECKED) ~ TRUE, + treated_bls_yn %in% REDCAP_LABELS_RADIO_YES & + (treated_name_bls___bs_01 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___bs_01 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___bs_02 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___bs_03 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___ls_01 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___98 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___99 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + treated_name_bls___unk %in% REDCAP_LABELS_CHECKBOX_CHECKED) ~ FALSE, + .default = NA), + + # Patient was treated after a positive malaria RDT and detail indicate + # a treatment to be further documented, but such description is missing + QC_PATIENT_TREATED_BUT_OTHER_DESCRIPTION_IS_MISSING = case_when(treated_bls_yn %in% REDCAP_LABELS_RADIO_YES & + treated_name_bls___99 %in% REDCAP_LABELS_CHECKBOX_CHECKED & + is.na(treated_name_other_bls) ~ TRUE, + treated_bls_yn %in% REDCAP_LABELS_RADIO_YES & + treated_name_bls___99 %in% REDCAP_LABELS_CHECKBOX_CHECKED & + !is.na(treated_name_other_bls) ~ FALSE, + .default = NA), + + # Information about blood sample collection is missing + QC_BLOOD_SAMPLE_COLLECTION_IS_MISSING = case_when(lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED ~ TRUE, + lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_CHECKED ~ FALSE, + .default = NA), + + # Blood sample was not collected from patient and no reason was recorded + QC_BLOOD_SAMPLE_COLLECTION_WAS_NOT_DONE_WITHOUT_DOCUMENTATION = case_when(lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_CHECKED & + is.na(lb_bloodsample_notcollected_det) ~ TRUE, + (lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_CHECKED) & + (lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED | + (lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_CHECKED & + !is.na(lb_bloodsample_notcollected_det))) ~ FALSE, + .default = NA), + + # Blood sample collection is inconsistent, reporting both collected and + # not collected status + QC_BLOOD_SAMPLE_COLLECTION_IS_INCORRECT = case_when((lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_CHECKED) & + lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_CHECKED ~ TRUE, + lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___unk %in% REDCAP_LABELS_CHECKBOX_UNCHECKED & + lb_bloodsample_collected_bls___0 %in% REDCAP_LABELS_CHECKBOX_CHECKED ~ FALSE, + .default = NA), + + # Blood sample collection was properly conducted but sample code was not + # entered or scanned and is missing + QC_BLOOD_SAMPLE_COLLECTION_DONE_BUT_SAMPLE_CODE_IS_MISSING = case_when((lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_CHECKED) & + is.na(lb_samplecode_bls) ~ TRUE, + (lb_bloodsample_collected_bls___samp_0100_dbs %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0500_microt %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_0009_bsmear %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2000_venb %in% REDCAP_LABELS_CHECKBOX_CHECKED | + lb_bloodsample_collected_bls___samp_2500_venbhep_2500 %in% REDCAP_LABELS_CHECKBOX_CHECKED) & + !is.na(lb_samplecode_bls) ~ FALSE), + + # Blood sample collection was properly conducted and sample code was + # recorded, but the sample code entered does not match the Participant ID + # and may be erroneous + QC_BLOOD_SAMPLE_COLLECTION_DONE_BUT_SAMPLE_CODE_IS_MISMATCH = case_when(!is.na(lb_samplecode_bls) & gsub(pattern = "^((?:E|M)-\\d{2}-\\d{3,4})-(?:A|B|C|D|M(?:0|6|12|18)|PCD)$", + replacement = "\\1", + lb_samplecode_bls) != uniqueid ~ TRUE, + !is.na(lb_samplecode_bls) & gsub(pattern = "^((?:E|M)-\\d{2}-\\d{3,4})-(?:A|B|C|D|M(?:0|6|12|18)|PCD)$", + replacement = "\\1", + lb_samplecode_bls) == uniqueid ~ FALSE, + .default = NA), + + # Blood sample collection was properly conducted and sample code was + # recorded, but information about double-checking the code is missing or + # says it was not double-checked + QC_SAMPLE_CODE_CHECK_IS_MISSING_OR_INCORRECT = case_when(!is.na(lb_samplecode_bls) & (is.na(lb_samplecode_bls_checked) | + lb_samplecode_bls_checked %in% REDCAP_LABELS_RADIO_NO) ~ TRUE, + !is.na(lb_samplecode_bls) & lb_samplecode_bls_checked %in% REDCAP_LABELS_RADIO_YES ~ FALSE, + .default = NA) + ) + + + + + + +###################################################################### +### WRITE QC DATA TO OUTPUT DIRECTORY +###################################################################### +## Name of output files + + +## Write to output files + + + + + + +###################################################################### +### CLEANUP +###################################################################### +rm() + + + + + +###################################################################### +### UPDATE DATA_EXTRACT_IS_RECENT_OBS +###################################################################### +DATA_EXTRACT_IS_RECENT_OBS <- as.logical(difftime(time1 = Sys.Date(), + time2 = as.Date(ifelse(exists("DATA_EXTRACT_TS_INT_M0"), DATA_EXTRACT_TS_INT_M0, DATA_EXTRACT_TS_DEFAULT)), + units = "days") <= DATA_EXTRACT_EXPIRY_TIME_D)