Analysis pipeline: Joint models quantify associations between T-cell kinetics and allo-immunological events after allogeneic stem cell transplantation and subsequent donor lymphocyte infusion
Edouard Bonneville and Eva Koster
The analysis pipeline for this work can be regenerated by rendering this file,
rmarkdown::render("_targets.Rmd")
The pipeline can then be run using,
tar_make()
The complete pipeline can be visualised using,
tar_visnetwork()
We first load the targets
package and remove the potentially outdated
workflow.
library("targets")
library("tarchetypes")
library("future")
library("future.callr")
library("here")
tar_unscript()
We now define shared global options across our workflow and load R
functions from the R
folder.
# Use capsule/renv eventually docker (!!)
options(tidyverse.quiet = TRUE)
# Global objects
cell_lines <- list("cell_line" = paste0("CD", c(3, 4, 8)))
# Load packages and functions necessary for pipeline
source(here::here("packages.R"))
source(here::here("data-raw/prepare-raw-data.R"))
functions <- list.files(here("R"), full.names = TRUE)
invisible(lapply(functions, source))
rm("functions")
# Run whole pipeline in parallel
plan(callr)
# Other pipeline options
tar_option_set(error = "continue")
#> Establish _targets.R and _targets_r/globals/globals.R.
- First, we pre-process the raw data
list(
tar_target(
lymphocytes_raw,
data.table(readRDS(here("data-raw/2022-02-24_v9/lymphocytes.rds")))
),
tar_target(
variables_raw,
data.table(readRDS(here("data-raw/2022-02-24_v9/variables.rds")))
),
tar_target(dat_merged, prepare_raw_data(lymphocytes_raw, variables_raw))
)
#> Establish _targets.R and _targets_r/targets/process-raw-data.R.
- We add reference values for the different cell lines, for further plotting later in the pipeline
tar_target(reference_values, {
cbind.data.frame(
"cell_type" = c("CD3", "CD4", "CD8", "NK", "CD19"),
"lower_limit" = c(860, 560, 260, 40, 60),
"upper_limit" = c(2490, 1490, 990, 390, 1000)
)
})
#> Define target reference_values from chunk code.
#> Establish _targets.R and _targets_r/targets/reference_values.R.
- Next, we prepare the long and wide datasets for the alloSCT and post DLI models
list(
tar_target(
NMA_preDLI_datasets,
get_preDLI_datasets(
dat_merged[TCD2 %in% c("NMA RD: ALT", "UD: ALT + ATG")],
admin_cens = 6
)
),
tar_target(
NMA_postDLI_datasets,
get_postDLI_datasets(
dat_merged = dat_merged[TCD2 %in% c("NMA RD: ALT", "UD: ALT + ATG")],
admin_cens_dli = 3
)
)
)
#> Establish _targets.R and _targets_r/targets/prepare-data.R.
- Current value
list(
# -- Pre-DLI Cox model
tar_target(
preDLI_cox,
run_preDLI_cox(
form = Surv(time, status) ~
ATG.1 + hirisk.1 + # GVHD
ATG.2 + hirisk.2 + # Relapse
ATG.3 + # NRF other
strata(trans),
dat_wide = NMA_preDLI_datasets$wide
)
),
# -- Longitudinal models - correlated reffs, fixed intercept
tar_map(
values = cell_lines,
tar_target(
preDLI_long_corr,
run_preDLI_longitudinal(
cell_line = paste0(cell_line, "_abs_log"),
form_fixed = "ns(intSCT2_7, 3) * hirisk * ATG + CMV_PD",
form_random = ~ 0 + ns(intSCT2_7, 3) | IDAA,
dat = NMA_preDLI_datasets$long
)
)
),
# Not many events for we lower default flexibility of baseline hazard (from 5 to 3 internal knots)
tar_target(preDLI_basehaz_knots, 3L),
# -- Joint models, correlated reffs
tar_target(
preDLI_JM_value_corr_CD3,
jointModel(
lmeObject = preDLI_long_corr_CD3,
survObject = preDLI_cox,
CompRisk = TRUE,
method = "spline-PH-aGH",
timeVar = "intSCT2_7",
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
iter.qN = 1000L,
lng.in.kn = preDLI_basehaz_knots,
numeriDeriv = "cd",
eps.Hes = 1e-04
)
),
tar_target(
preDLI_JM_value_corr_CD4,
jointModel(
lmeObject = preDLI_long_corr_CD4,
survObject = preDLI_cox,
CompRisk = TRUE,
method = "spline-PH-aGH",
timeVar = "intSCT2_7",
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
iter.qN = 1000L,
lng.in.kn = preDLI_basehaz_knots,
numeriDeriv = "cd",
eps.Hes = 1e-04
)
),
tar_target(
preDLI_JM_value_corr_CD8,
jointModel(
lmeObject = preDLI_long_corr_CD8,
survObject = preDLI_cox,
CompRisk = TRUE,
method = "spline-PH-aGH",
timeVar = "intSCT2_7",
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
iter.qN = 1000L,
lng.in.kn = preDLI_basehaz_knots,
numeriDeriv = "cd",
eps.Hes = 1e-04,
verbose = TRUE
)
)
)
#> Establish _targets.R and _targets_r/targets/preDLI-current-value.R.
- Time-dependent slopes
list(
# Store the deriv form (since same for all mods)
tar_target(
derivForm_preDLI,
list(
fixed = ~ 0 + dns(intSCT2_7, 3) +
dns(intSCT2_7, 3):as.numeric(hirisk == "yes") +
dns(intSCT2_7, 3):as.numeric(ATG == "UD(+ATG)") +
dns(intSCT2_7, 3):as.numeric(hirisk == "yes"):as.numeric(ATG == "UD(+ATG)"),
random = ~ 0 + dns(intSCT2_7, 3),
indFixed = c(2:4, 8:13, 15:17),
indRandom = c(1:3)
)
),
tar_target(
preDLI_JM_both_corr_CD3,
update(
preDLI_JM_value_corr_CD3,
lmeObject = preDLI_long_corr_CD3,
survObject = preDLI_cox,
parameterization = "both",
interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
derivForm = derivForm_preDLI,
lng.in.kn = preDLI_basehaz_knots
)
),
# This one gets stuck local minima.. update iter.EM to 500
tar_target(
preDLI_JM_both_corr_CD4,
update(
preDLI_JM_value_corr_CD4,
lmeObject = preDLI_long_corr_CD4,
survObject = preDLI_cox,
parameterization = "both",
interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
derivForm = derivForm_preDLI,
lng.in.kn = preDLI_basehaz_knots,
iter.EM = 500L,
verbose = TRUE
)
),
tar_target(
preDLI_JM_both_corr_CD8,
update(
preDLI_JM_value_corr_CD8,
lmeObject = preDLI_long_corr_CD8,
survObject = preDLI_cox,
parameterization = "both",
interFact = list("value" = ~ strata(trans) - 1, "slope" = ~ strata(trans) - 1),
derivForm = derivForm_preDLI,
lng.in.kn = preDLI_basehaz_knots
)
)
)
#> Establish _targets.R and _targets_r/targets/preDLI-slopes.R.
list(
# -- Longitudinal models - correlated reffs, random intercept + slope
tar_map(
values = cell_lines,
tar_target(
postDLI_long_corr,
run_preDLI_longitudinal(
cell_line = paste0(cell_line, "_abs_log"),
form_fixed = "ns(intDLI1, 2) * ATG + CMV_PD",
form_random = ~ ns(intDLI1, 2) | IDAA,
dat = NMA_postDLI_datasets$long
)
)
),
# -- Post-DLI cox:
tar_target(
postDLI_cox,
run_postDLI_cox(
Surv(time, status) ~ ATG.1 + strata(trans), # ATG.2 wanted but not enough events
dat_wide = NMA_postDLI_datasets$wide
)
),
# 3L used pre-DLI, consider now 2L since only on portion of follow-up
tar_target(postDLI_basehaz_knots, 2L),
# -- Joint models, correlated reffs
tar_target(
postDLI_JM_corr_CD3,
jointModel(
lmeObject = postDLI_long_corr_CD3,
survObject = postDLI_cox,
CompRisk = TRUE,
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
method = "spline-PH-aGH",
timeVar = "intDLI1",
lng.in.kn = postDLI_basehaz_knots,
iter.EM = 1000L,
numeriDeriv = "cd",
eps.Hes = 1e-04,
verbose = TRUE
)
),
tar_target(
postDLI_JM_corr_CD4,
jointModel(
lmeObject = postDLI_long_corr_CD4,
survObject = postDLI_cox,
CompRisk = TRUE,
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
method = "spline-PH-aGH",
timeVar = "intDLI1",
lng.in.kn = postDLI_basehaz_knots,
iter.EM = 1000L,
numeriDeriv = "cd",
eps.Hes = 1e-04,
verbose = TRUE
)
),
tar_target(
postDLI_JM_corr_CD8,
jointModel(
lmeObject = postDLI_long_corr_CD8,
survObject = postDLI_cox,
CompRisk = TRUE,
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
method = "spline-PH-aGH",
timeVar = "intDLI1",
lng.in.kn = postDLI_basehaz_knots,
iter.EM = 1000L,
numeriDeriv = "cd",
eps.Hes = 1e-04,
verbose = TRUE
)
)
)
#> Establish _targets.R and _targets_r/targets/postDLI-value.R.
Add here targets for:
- Statistical supplement rmd (with corresponding figures)
- Manuscript figures rmd
list(
tar_target(
preDLI_long_corr_NK,
run_preDLI_longitudinal(
cell_line = "NK_abs_log",
form_fixed = "ns(intSCT2_7, 3) * hirisk * ATG + CMV_PD",
form_random = ~ 0 + ns(intSCT2_7, 3) | IDAA,
dat = NMA_preDLI_datasets$long
)
),
tar_target(
preDLI_JM_value_corr_NK,
jointModel(
lmeObject = preDLI_long_corr_NK,
survObject = preDLI_cox,
CompRisk = TRUE,
method = "spline-PH-aGH",
timeVar = "intSCT2_7",
parameterization = "value",
interFact = list("value" = ~ strata(trans) - 1),
iter.qN = 1000L,
lng.in.kn = preDLI_basehaz_knots,
numeriDeriv = "cd",
eps.Hes = 1e-04
)
),
tar_target(
preDLI_tdc_dataset,
make_tdc_dataset(
dat_wide = NMA_preDLI_datasets$wide,
dat_long = NMA_preDLI_datasets$long
)
)
)
#> Establish _targets.R and _targets_r/targets/NK-analyses.R.