From 93f9a7d2a67394b9c66762944e37a9230d897ae5 Mon Sep 17 00:00:00 2001
From: Ian <ian.c.bacher@gmail.com>
Date: Mon, 16 Dec 2024 09:13:38 -0500
Subject: [PATCH] Fix calculation of week to get thresholds from

---
 docker-resources/plumber.R | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/docker-resources/plumber.R b/docker-resources/plumber.R
index 3ad65e3..364850c 100644
--- a/docker-resources/plumber.R
+++ b/docker-resources/plumber.R
@@ -200,11 +200,8 @@ function(
     select(predicted_prob_disengage_7day = Disengaged)
 
   # for the case where we need this, it should be safe to assume
-  # that the start week has the correct values
-  # a cohort is the predictions generated for a given week
-  # note that we use the week _before_ the start date; this is because the calls
-  # should be made a week before the RTC date
-  cohort <- clock::date_format(clock::add_weeks(start_date, -1), format="%Y-W%U")
+  # that the start week has the correct valuese
+  prediction_week <- clock::date_format(start_of_week, format="%Y-W%U")
 
   # the h2o result dataframes do not have the person_id, encounter_id, or location_id
   # as these are not used in generating predictions, so here we add those back in
@@ -221,7 +218,7 @@ function(
     # reduce data frame and rename the result
     select(person_id, encounter_id, location_id, rtc_date, predicted_prob_disengage, predicted_prob_disengage_7day) %>%
     # calculate the patient's risk category
-    predict_risk(cohort, "adults") %>%
+    predict_risk(prediction_week, "adults") %>%
     # add per-row metadata about the run
     mutate(
       prediction_generated_date = Sys.time(),
@@ -240,7 +237,7 @@ function(
     # reduce data frame and rename the result
     select(person_id, encounter_id, location_id, rtc_date, predicted_prob_disengage, predicted_prob_disengage_7day) %>%
     # calculate the patient's risk category
-    predict_risk(cohort, "minors") %>%
+    predict_risk(prediction_week, "minors") %>%
     # add per-row metadata about the run
     mutate(
       prediction_generated_date = Sys.time(),
@@ -292,7 +289,7 @@ get_week_number <- function(date) {
 }
 
 # this is a utility function that mostly handles the risk thresholding
-predict_risk <- function(.data, cohort, age_category) {
+predict_risk <- function(.data, prediction_week, age_category) {
   # arbitrary cut-off, but we expect one big batch per week
   # and several small batches; small batches are handled by this if
   if (nrow(.data) < 50) {
@@ -301,7 +298,7 @@ predict_risk <- function(.data, cohort, age_category) {
       DBI::sqlInterpolate(
         my_pool,
         ifelse(age_category == "minors", minor_risk_threshold_query, adult_risk_threshold_query),
-        week = cohort
+        week = prediction_week
       )
     )
 
@@ -328,16 +325,16 @@ predict_risk <- function(.data, cohort, age_category) {
           group_by(location_id) %>%
           mutate(
             hrisk_threshold = high_risk %>%
-              filter(location_id == cur_group()$location_id) %>%
+              filter(location_id == cur_group() %>% pull(location_id)) %>%
               select(probability_threshold) %>% pull,
             hrisk_threshold_7day = high_risk_7day %>%
-              filter(location_id == cur_group()$location_id) %>%
+              filter(location_id == cur_group() %>% pull(location_id)) %>%
               select(probability_threshold) %>% pull,
             mrisk_threshold = medium_risk %>%
-              filter(location_id == cur_group()$location_id) %>%
+              filter(location_id == cur_group() %>% pull(location_id)) %>%
               select(probability_threshold) %>% pull,
             mrisk_threshold_7day = medium_risk_7day %>%
-              filter(location_id == cur_group()$location_id) %>%
+              filter(location_id == cur_group() %>% pull(location_id)) %>%
               select(probability_threshold) %>% pull,
             predicted_risk =
               case_when(