Skip to content

Commit

Permalink
Implement V9
Browse files Browse the repository at this point in the history
  • Loading branch information
ibacher committed Apr 24, 2024
1 parent 95e1af9 commit f5086a7
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 212 deletions.
16 changes: 6 additions & 10 deletions .github/workflows/docker-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,23 @@ name: docker ci
on:
push:
branches:
- 'main'
- "main"

jobs:
docker:
runs-on: ubuntu-latest
steps:
-
name: Set up QEMU
- name: Set up QEMU
uses: docker/setup-qemu-action@v2
-
name: Set up Docker Buildx
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
-
name: Login to Docker Hub
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
name: Build and push
- name: Build and push
uses: docker/build-push-action@v4
with:
push: true
tags: ampathke/ampath-iit-prediction-model-v7:latest
tags: ampathke/ampath-iit-prediction-model-v9:latest
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ RUN install2.r --error --skipinstalled \
RUN Rscript -e "remotes::install_version('h2o', '3.42.0.2')"

# Add the prediction model to the app
COPY IIT-Prediction/model/V7 /app/model
COPY IIT-Prediction/model/V9 /app/model
# Add the production extraction query to the app
COPY SQL/iit_prod_data_extract.sql /app/iit_prod_data_extract.sql

Expand Down
210 changes: 13 additions & 197 deletions SQL/iit_prod_data_extract.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,84 +22,6 @@ with num_1day_defaults_last_3_visits as (
left join predictions.flat_ml_days_defaulted dd3
on dd3.person_id = dd2.person_id
and dd3.visit_number = dd2.visit_number - 1
),
num_7day_defaults_last_3_visits as (
select
dd1.person_id,
dd1.encounter_id,
dd1.visit_number,
case
when dd1.days_defaulted_last_encounter is null or
dd2.days_defaulted_last_encounter is null or
dd3.days_defaulted_last_encounter is null
then null
else
if(dd1.days_defaulted_last_encounter >= 7, 1, 0) +
if(dd2.days_defaulted_last_encounter >= 7, 1, 0) +
if(dd3.days_defaulted_last_encounter >= 7, 1, 0)
end as num_7day_defaults_last_3_visits
from predictions.flat_ml_days_defaulted dd1
left join predictions.flat_ml_days_defaulted dd2
on dd2.person_id = dd1.person_id
and dd2.visit_number = dd1.visit_number - 1
left join predictions.flat_ml_days_defaulted dd3
on dd3.person_id = dd2.person_id
and dd3.visit_number = dd2.visit_number - 1
),
num_2wk_defaults_last_3_visits as (
select
dd1.person_id,
dd1.encounter_id,
dd1.visit_number,
case
when dd1.days_defaulted_last_encounter is null or
dd2.days_defaulted_last_encounter is null or
dd3.days_defaulted_last_encounter is null
then null
else
if(dd1.days_defaulted_last_encounter >= 14, 1, 0) +
if(dd2.days_defaulted_last_encounter >= 14, 1, 0) +
if(dd3.days_defaulted_last_encounter >= 14, 1, 0)
end as num_2wks_defaults_last_3visits
from predictions.flat_ml_days_defaulted dd1
left join predictions.flat_ml_days_defaulted dd2
on dd2.person_id = dd1.person_id
and dd2.visit_number = dd1.visit_number - 1
left join predictions.flat_ml_days_defaulted dd3
on dd3.person_id = dd2.person_id
and dd3.visit_number = dd2.visit_number - 1
),
num_1month_defaults_last_3_visits as (
select
dd1.person_id,
dd1.encounter_id,
dd1.visit_number,
case
when dd1.days_defaulted_last_encounter is null or
dd2.days_defaulted_last_encounter is null or
dd3.days_defaulted_last_encounter is null
then null
else
if(dd1.days_defaulted_last_encounter >= 30, 1, 0) +
if(dd2.days_defaulted_last_encounter >= 30, 1, 0) +
if(dd3.days_defaulted_last_encounter >= 30, 1, 0)
end as num_1month_defaults_last_3_visits
from predictions.flat_ml_days_defaulted dd1
left join predictions.flat_ml_days_defaulted dd2
on dd2.person_id = dd1.person_id
and dd2.visit_number = dd1.visit_number - 1
left join predictions.flat_ml_days_defaulted dd3
on dd3.person_id = dd2.person_id
and dd3.visit_number = dd2.visit_number - 1
),
defaults_by_days as (
select
dd.person_id,
dd.encounter_id,
encounter_date,
max(dd.days_defaulted_last_encounter) as days_defaulted
from predictions.flat_ml_days_defaulted dd
group by dd.person_id, encounter_date
)
-- describe the columns we need
select
Expand All @@ -111,16 +33,6 @@ select
timestampdiff(YEAR, p.birthdate, fs.encounter_datetime) as Age,
if(p.birthdate is null, 1, 0) as Age_NA,
p.gender as Gender,
null as Marital_status,
timestampdiff(year,
if(year(fs.arv_first_regimen_start_date) != 1900, -- 1900 indicates junk data
date(fs.arv_first_regimen_start_date),
null
),
date(fs.encounter_datetime)
) as Duration_in_HIV_care,
if(fs.arv_first_regimen_start_date is null or year(fs.arv_first_regimen_start_date) = 1900,
1, 0) as Duration_in_HIV_care_NA,
-- BMI = wt / (ht / 100)^2
-- BMI < 5.0 or over 60.0 are considered errors, usually errors in the underlying data
case
Expand All @@ -131,7 +43,7 @@ select
when round(fs.weight / ((fs.height / 100) * (fs.height / 100)), 2) > 60.0
then null
else round(fs.weight / ((fs.height / 100) * (fs.height / 100)), 2)
end as BMI,
end as BMI,
case
when fs.weight is null or fs.height is null or fs.weight < 1 or fs.height < 1
then 1
Expand All @@ -140,113 +52,38 @@ select
when round(fs.weight / ((fs.height / 100) * (fs.height / 100)), 2) > 60.0
then 1
else 0
end as BMI_NA,
null as Travel_time,
fs.cur_who_stage as WHO_staging,
if(fs.cur_who_stage is null, 1, 0) as WHO_staging_NA,
end as BMI_NA,
log10(fs.vl_resulted + 1) as Viral_Load_log10,
if(fs.vl_resulted is null, 1, 0) as Viral_Load_log10_NA,
if(fs.vl_resulted < 1000, 1, 0) as VL_suppression,
timestampdiff(DAY, fs.encounter_datetime, fs.vl_resulted_date) as Days_Since_Last_VL,
fs.hiv_status_disclosed as HIV_disclosure,
if(fs.hiv_status_disclosed is null, 1, 0) as HIV_disclosure_NA,
-- Regimen Line data frequently differs forom the training data
fs.cur_arv_line as Regimen_Line,
if(fs.cur_arv_line is null, 1, 0) as Regimen_Line_NA,
coalesce(fs.is_pregnant, 0) as Pregnancy,
-- manual look-up table for site characteristics
case
when fs.location_id in (
-- Dumisha
55, 315, 19, 230, 26, 23, 319, 130, 313, 9, 342, 78, 310, 20, 312, 12, 321, 8, 341, 19, 230,
-- Uzima
1, 13, 14, 15, 197, 198, 17, 227, 214, 306, 11, 229, 421, 422, 423, 420,
-- April 2024 Cohort
211, 140, 69, 208, 11, 229
)
then 'Urban'
when fs.location_id in (
65, 314, 64, 83, 316, 90, 135, 106, 86, 336, 91, 320, 74, 76, 79, 100, 311, 75,
-- April 2024 Cohort
60, 323, 4, 322, 351, 352
)
then 'Rural'
end as Clinic_Location,
null as TB_Comorbidity,
if(fs.vl_resulted_date is null, 1, 0) as Days_Since_Last_VL_NA,
fs.cd4_resulted as CD4,
if(fs.cd4_resulted is null, 1, 0) as CD4_NA,
datediff(fs.encounter_datetime, fs.cd4_resulted_date) as Days_Since_Last_CD4,
null as Entry_Point,
case
when et.name in ('ADULTINITIAL', 'PEDSINITIAL', 'YOUTHINITIAL') then 'Initial'
when et.name in ('ADULTRETURN', 'PEDSRETURN', 'YOUTHRETURN') then 'Return'
else 'Other'
end as Encounter_Type_Class,
null as Education_Level,
null as Occupation,
null as Adherence_Counselling_Sessions,
l.name as Clinic_Name,
replace(etl.get_arv_names(fs.cur_arv_meds), '##', '+') as ART_regimen,
if(fs.cd4_resulted_date is null, 1, 0) as Days_Since_Last_CD4_NA,
-- flat_hiv_summary has a visit_number value, but its a total counter
-- the model is trained on data from 2021, so we recalculate the visit number from the
-- default data
dd.visit_number as Visit_Number,
days_defaulted_last_encounter as Days_defaulted_in_prev_enc,
if(days_defaulted_last_encounter is null, 1, 0) as Days_defaulted_in_prev_enc_NA,
num_1day_defaults_last_3_visits as num_1day_defaults_last_3visits,
if(num_1day_defaults_last_3_visits is null, 1, 0) as num_1day_defaults_last_3visits_NA,
num_7day_defaults_last_3_visits as num_7days_defaults_last_3visits,
if(num_7day_defaults_last_3_visits is null, 1, 0) as num_7days_defaults_last_3visits_NA,
num_2wks_defaults_last_3visits,
if(num_2wks_defaults_last_3visits is null, 1, 0) as num_2wks_defaults_last_3visits_NA,
num_1month_defaults_last_3_visits as num_1month_defaults_last_3visits,
if(num_1month_defaults_last_3_visits is null, 1, 0) as num_1month_defaults_last_3visits_NA,
coalesce(any_30d_defaults_1yr, 0) as ever_defaulted_by_1m_in_last_1year,
if(any_30d_defaults_1yr is null, 1, 0) as ever_defaulted_by_1m_in_last_1year_NA,
coalesce(any_30d_defaults_2yr, 0) as ever_defaulted_by_1m_in_last_2year,
if(any_30d_defaults_2yr is null, 1, 0) as ever_defaulted_by_1m_in_last_2year_NA,
Age_baseline,
Gender_baseline,
Marital_status_baseline,
BMI_baseline,
Travel_time_baseline,
WHO_staging_baseline,
VL_suppression_baseline,
Viral_Load_log10_baseline,
HIV_disclosure_baseline,
Regimen_Line_baseline,
Pregnancy_baseline,
Clinic_Location_baseline,
TB_Comorbidity_baseline,
CD4_baseline,
Education_Level_baseline,
Occupation_baseline,
Adherence_Counselling_Sessions_baseline,
Clinic_Name_baseline,
ART_regimen_baseline,
fs.cur_arv_adherence as ART_Adherence,
coalesce(fs.hiv_disclosure_status_value, 'Not Done') as HIV_disclosure_stage,
l.state_province as Clinic_County,
l.name as Clinic_Name,
program.name as Program_Name,
coalesce(fs.tb_screen, 0) as TB_screening,
fs.tb_test_result as TB_Test_Result,
fs.on_tb_tx as On_TB_TX,
coalesce(fs.on_ipt, 0) as On_IPT,
coalesce(fs.ca_cx_screen, 0) as CA_CX_Screening,
fs.ca_cx_screening_result as CA_CX_Screening_Result,
convert(month(date(fs.rtc_date)), char) as 'Month'
convert(month(date(fs.rtc_date)), char) as 'Month',
mfm.clinic_county as Current_Clinc_County,
mfm.size_enrollments_log10 as Size_Enrollments_Log10,
mfm.volume_visits_log10 as Volume_Visits_Log10,
mfm.care_programme as 'Care Programme',
mfm.facility_type as 'Facility Type',
program.name as 'Program Name'
from etl.flat_hiv_summary_v15b as fs
left join predictions.flat_ml_baseline_visit baseline
on fs.person_id = baseline.person_id
left join predictions.flat_ml_days_defaulted dd
on dd.encounter_id = fs.encounter_id
and dd.person_id = fs.person_id
join amrs.person p on p.person_id = fs.person_id
left join amrs.encounter_type et on fs.encounter_type = et.encounter_type_id
left join amrs.location l
on fs.location_id = l.location_id
and l.retired = 0
left join predictions.ml_facility_metadata mfm
on fs.location_id = mfm.location_id
-- If a patient in enrolled in PMTCT, they are also enrolled in antenatal care
-- Currently, we only keep the PMTCT record
left join etl.program_visit_map pvm
Expand All @@ -260,27 +97,6 @@ from etl.flat_hiv_summary_v15b as fs
left join num_1day_defaults_last_3_visits 1day_defaults
on 1day_defaults.person_id = fs.person_id
and 1day_defaults.encounter_id = fs.encounter_id
left join num_7day_defaults_last_3_visits 7day_defaults
on 7day_defaults.person_id = fs.person_id
and 7day_defaults.encounter_id = fs.encounter_id
left join num_2wk_defaults_last_3_visits 2wk_defaults
on 2wk_defaults.person_id = fs.person_id
and 2wk_defaults.encounter_id = fs.encounter_id
left join num_1month_defaults_last_3_visits 1month_defaults
on 1month_defaults.person_id = fs.person_id
and 1month_defaults.encounter_id = fs.encounter_id
left join (
select person_id, if(days_defaulted >= 30, 1, 0) as any_30d_defaults_1yr
from defaults_by_days
where encounter_date between date_sub(?startDate, interval 1 year) and ?startDate
group by person_id
) as 1yr on 1yr.person_id = fs.person_id
left join (
select person_id, if(days_defaulted >= 30, 1, 0) as any_30d_defaults_2yr
from defaults_by_days
where encounter_date between date_sub(?startDate, interval 2 year) and ?startDate
group by person_id
) as 2yr on 2yr.person_id = fs.person_id
left join predictions.ml_weekly_predictions mlp
on mlp.encounter_id = fs.encounter_id
where
Expand Down
8 changes: 4 additions & 4 deletions docker-resources/plumber.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ h2o.init()
dbConfig <- config::get()

# Update this when the model version changes
ml_model_version <- "V7"
ml_model_version <- "V9"

# this is the adult model; we only load it once
ml_model_adult <- h2o.loadModel(
"/app/model/y0_1days_adult_IIT/1_StackedEnsemble_BestOfFamily_1_AutoML_1_20230812_150159_auc_0.775/StackedEnsemble_BestOfFamily_1_AutoML_1_20230812_150159"
"/app/model/y0_1days_adult_IIT/2_StackedEnsemble_BestOfFamily_1_AutoML_8_20240411_135528_auc_0.739/StackedEnsemble_BestOfFamily_1_AutoML_8_20240411_135528"
)

# this is the peds model
ml_model_minor <- h2o.loadModel(
"/app/model/y0_1day_minor_IIT/1_StackedEnsemble_BestOfFamily_1_AutoML_2_20230813_03957_auc_0.734/StackedEnsemble_BestOfFamily_1_AutoML_2_20230813_03957"
"/app/model/y0_1days_minor_IIT/1_StackedEnsemble_AllModels_1_AutoML_6_20240329_151542_auc_0.721/StackedEnsemble_AllModels_1_AutoML_6_20240329_151542"
)

# here we also load the SQL script we use to extract data
Expand Down Expand Up @@ -194,7 +194,7 @@ function(
prediction_result <- bind_rows(prediction_results_adults, prediction_results_minors)

# add the rows from the prediction_result to the ml_weekly_predictions table
DBI::dbAppendTable(my_pool, SQL('predictions.ml_weekly_predictions'), prediction_result)
DBI::dbAppendTable(my_pool, SQL('predictions.ml_weekly_predictions_test'), prediction_result)

# return the result so the API returns *something*
prediction_result
Expand Down

0 comments on commit f5086a7

Please sign in to comment.