Method:
library(dplyr)
library(Syndemics)
library(doParallel)
results <- list()
unique_years <- unique(data$year)
n_cores <- detectCores() - 1
registerDoParallel(n_cores)
for(i in unique_years) {
estimates <- foreach(j = 1:1000, .combine = 'rbind', .packages = c("dplyr", "Syndemics")) %dopar% {
set.seed(j)
output <- data %>%
mutate(N_ID = ifelse(N_ID == -1, round(runif(nrow(.), 1, 10)), N_ID)) %>%
filter(year == i) %>%
select(-c(final_re, final_sex, age_grp_ten)) %>%
unique()
Syndemics::crc(output, "N_ID", c("Casemix", "APCD", "BSAS", "PMP", "Matris", "Death"),
formula.selection = "stepwise", method = "poisson",
opts.stepwise = list(direction = "forward",
verbose = FALSE))$estimate
}
results[[as.character(i)]] <- list(crcMean = mean(estimates), crcMinimum = min(estimates), crcMaximum = max(estimates))
}
stopImplicitCluster()
output <- do.call(rbind, results) %>%
as.data.frame() %>%
tibble::rownames_to_column(var = "year") %>%
mutate(year = as.integer(year)) %>%
left_join(., data %>%
select(-c(final_re, final_sex, age_grp_ten)) %>%
unique() %>%
group_by(year) %>%
summarise(., known = sum(N_ID))
) %>%
rowwise() %>%
mutate(estimate = crcMean + known,
estimateMin = crcMinimum + known,
estimateMax = crcMaximum + known)