diff --git a/01_INVENTORY/INVENTORY_05_select_list_function.R b/01_INVENTORY/INVENTORY_05_select_list_function.R new file mode 100644 index 0000000000000000000000000000000000000000..0eb5da21c07e98a836c4e8aa17c73250b28e8493 --- /dev/null +++ b/01_INVENTORY/INVENTORY_05_select_list_function.R @@ -0,0 +1,64 @@ +#function computes the number of samples needed in each cluster +size_sample <- function(n_observ, n_cluster, n_sample){ + + if(n_cluster <= n_sample){ + return(n_observ) + } + else{ + + prop = round((n_observ/n_cluster)*n_sample, 0) + return(prop)} +} + + +#### function select list randomly +extract_list_sample <- function(clusterFilter, sexFilter, ageClassFilter, limit){ + inventory_list_p %>% + filter(clusterid == clusterFilter & sex == sexFilter & ageClass == ageClassFilter) %>% + sample_n(n()) %>% + sample_n(limit) + +} + +#### function extract final selected list randomly + +myfunction <- function(clusteridFilter, sampleNeeded){ + df <- inventory_list_p %>% + count(country, clusterid, sex, ageClass, name = "sexage_n") %>% + left_join( + inventory_list_p %>% group_by(clusterid) %>% + summarise(pop_cluster = n())) %>% + mutate(n_sample = mapply(size_sample, + sexage_n, + pop_cluster, + sampleNeeded), + list = mapply(extract_list_sample, + clusterid, + sex, + ageClass, + n_sample, + SIMPLIFY = FALSE)) %>% + filter(clusterid == clusteridFilter) + sample_list <- bind_rows(df$list) + file_name <- paste0("outputs/sample_cluster_", + clusteridFilter,"_", + strftime(Sys.time(), + format = "%Y%m%d_%H%M%S"),".csv") + return(write.csv2(sample_list, file_name)) + + + } + + myfunction(32, 20) + + #if we need more clusters + save_multiple_cluster <- function(listClusterid, numberSampleNeeded){ + index <- 1 + for (index in 1:length(listClusterid)) { + myfunction(listClusterid[index], numberSampleNeeded[index]) + index <- index + 1 + } + } + + save_multiple_cluster(c(3,4), c(10,5)) + \ No newline at end of file