Updates 15

awamaeva · Feb 6, 2024 · 9849720 · 9849720
1 parent 5a84b29
commit 9849720
Show file tree

Hide file tree

Showing 39 changed files with 192 additions and 677 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,8 @@
 Package: trajmsm
 Type: Package
 Title: Marginal Structural Models with Latent Class Growth Analysis of Treatment Trajectories
-Version: 0.1.2
-Authors@R: c(
-    person("Awa", "Diop", email = "awa.diop.2@ulaval.ca", role = c("aut", "cre")),
-    person("Denis", "Talbot", role = "aut"))
+Version: 0.1.0
+Authors: Awa Diop, Denis Talbot
 Maintainer: Awa Diop <awa.diop.2@ulaval.ca>
 Description: Implements marginal structural models combined with latent class growth analysis framework for assessing the causal effect of treatment trajectories. Based on the approach described in "Marginal Structural Models with Latent Class Growth Analysis of Treatment Trajectories" (https://journals.sagepub.com/doi/pdf/10.1177/09622802231202384).
 License: GPL (>= 3)
@@ -17,12 +15,9 @@ Imports:
     geepack,
     ggplot2,
     survival,
-    sandwich
+    sandwich,
+    utils
 LazyData: true
-Suggests: 
-    knitr,
-    rmarkdown
-VignetteBuilder: knitr
 RoxygenNote: 7.2.0
 URL: https://github.com/awamaeva/R-package-trajmsm
 BugReports: https://github.com/awamaeva/R-package-trajmsm/issues 
diff --git a/LICENSE b/LICENSE
diff --git a/Meta/.DS_Store b/Meta/.DS_Store
diff --git a/Meta/README.md b/Meta/README.md
@@ -1,23 +1,17 @@
-# R-package-trajMSM
-The package trajMSM is based on the paper Marginal Structural Models with Latent Class Growth
-Analysis of Treatment Trajectories: https://doi.org/10.48550/arXiv.2105.12720. Latent class growth
-analysis (LCGA) are increasingly proposed as a solution to summarize the observed longitudinal
-treatment in a few distinct groups. When combined with standard approaches like Cox proportional
-hazards models, LCGA can fail to control time-dependent confounding bias because of timevarying
-covariates that have a double role of confounders and mediators. We propose to use LCGA
-to classify individuals into a few latent classes based on their medication adherence pattern, then
-choose a working marginal structural model (MSM) that relates the outcome to these groups. The
-parameter of interest is nonparametrically defined as the projection of the true MSM onto the chosen
-working model. The combination of LCGA with MSM (LCGA-MSM) is a convenient way
-to describe treatment adherence and can effectively control time-dependent confounding. Several
-approaches exist to estimate the parameters of a MSM and one of the most popular is the inverse
-probability weighting (IPW). The IPW mimics a random assignment of the treatment by creating
-a pseudo-population where the treated and the untreated groups are comparable. In longitudinal
-settings, IPW can appropriately adjust for time-varying covariates affected by prior exposure and
-selection bias. In this first version, we proposed to estimate parameters of the LCGA-MSM using
-the IPW, g-computation and pooled LTMLE. We proposed an extension of the LCGA-MSM to a time-dependent outcome.
-We called this approach LCGA-HRMSM for LCGA and history-rectricted HRMSM. The same three estimators: IPW, g-computation
-and pooled LTMLE are proposed to estimate parameters of LCGA-HRMSMs. 
+# R-package-trajmsm
 
-To access the R codes: https://github.com/awamaeva/R-package-trajMSM.
+<p align="center">
+  <img src="vignettes/trajmsm_logo.png" alt="" width="200">
+</p>
 
+The `trajmsm` package is inspired by the paper "Marginal Structural Models with Latent Class Growth Analysis of Treatment Trajectories," published in *Statistical Methods for Medical Research*. [Read the paper](https://journals.sagepub.com/doi/pdf/10.1177/09622802231202384). 
+
+Latent Class Growth Analysis (LCGA) is increasingly used to summarize longitudinal treatment data into distinct groups. However, combining LCGA with standard methods like Cox proportional hazards models often fails to control time-dependent confounding due to covariates acting as both confounders and mediators. Our solution employs LCGA to classify individuals into latent classes based on their medication adherence patterns. We then utilize a Marginal Structural Model (MSM) to relate outcomes to these groups. The key parameter is nonparametrically defined, projecting the true MSM onto the selected working model.
+
+Integrating LCGA with MSM (termed LCGA-MSM) offers an effective way to describe treatment adherence and control time-dependent confounding. Common methods to estimate MSM parameters include Inverse Probability Weighting (IPW), which creates a pseudo-population for balanced treatment groups. In longitudinal settings, IPW adjusts for time-varying covariates impacted by previous exposures and selection bias.
+
+In this initial version of `trajmsm`, we estimate LCGA-MSM parameters using IPW, g-computation, and pooled Longitudinal Targeted Maximum Likelihood Estimation (LTMLE). We also introduce an extension for time-dependent outcomes, termed LCGA-History-Restricted MSM (LCGA-HRMSM), with the same three estimators.
+
+For access to the R codes, visit our [GitHub repository](https://github.com/awamaeva/R-package-trajMSM).
+
+For additional insights on trajectory analysis, check out the [trajectory_analysis repository](https://github.com/awamaeva/trajectory_analysis).
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,18 +8,15 @@ export(inverse_probability_weighting)
 export(pltmle)
 export(predict_traj)
 export(split_data)
-export(stabilized_ipcw)
-export(stabilized_iptw)
 export(trajhrmsm_gform)
 export(trajhrmsm_ipw)
 export(trajhrmsm_pltmle)
 export(trajmsm_gform)
 export(trajmsm_ipw)
 export(trajmsm_pltmle)
-export(unstabilized_ipcw)
-export(unstabilized_iptw)
 import(e1071)
 import(flexmix)
+import(geepack)
 import(ggplot2)
 import(sandwich)
 importFrom(stats,aggregate)
@@ -34,8 +31,11 @@ importFrom(stats,plogis)
 importFrom(stats,pnorm)
 importFrom(stats,qlogis)
 importFrom(stats,quantile)
+importFrom(stats,quasibinomial)
 importFrom(stats,rbinom)
 importFrom(stats,relevel)
 importFrom(stats,reshape)
 importFrom(stats,sd)
+importFrom(stats,var)
 importFrom(survival,coxph)
+importFrom(utils,combn)
diff --git a/R/.DS_Store b/R/.DS_Store
diff --git a/R/gendata.R b/R/gendata.R
@@ -74,7 +74,6 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),
 
 
     if (include_censor && format == "long"){
-      obsdata = obsdata_long
       obsdata$censor <- as.vector(censor)
     }
 
@@ -129,7 +128,6 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),
 
 
       if (include_censor  && format == "long") {
-        obsdata = obsdata_long
         obsdata$censor <- as.vector(censor)
       }
 

diff --git a/R/gformula.R b/R/gformula.R
@@ -1,16 +1,31 @@
-#' @title Counterfactual means for g-formula.
-#' @description Get the counterfactual means for the g-formula.
+#' @title Counterfactual Means via G-Formula
+#' @description Calculates counterfactual means using the g-formula approach.
 #' @name gformula
 #' @param formula specification of the model for the outcome to be fitted.
 #' @param baseline name of the baseline covariates.
 #' @param covariates list of the names of the time-varying covariates.
 #' @param treatment name of the time-varying treatment.
-#' @param Y outcome variable.
+#' @param outcome name of the outcome variable.
 #' @param ntimes_interval length of a time-interval (s).
 #' @param obsdata observed data in wide format.
 #' @returns  \item{list_gform_countermeans}{Counterfactual means obtained with g-formula.}
 #' @import e1071
 #' @export
+#' @examples
+#' obsdata = gendata(n = 1000, format = "wide", total_followup = 6, seed = 945)
+#' years <- 2011:2016
+#' baseline_var <- c("age","sex")
+#' variables <- c("hyper", "bmi")
+#' var_cov <- c("statins","hyper", "bmi")
+#' covariates <- lapply(years, function(year) {
+#' paste0(variables, year)})
+#' treatment_var <- paste0("statins", 2011:2016)
+#' formula = paste0("y ~", paste0(treatment_var,collapse = "+"), "+",
+#'                 paste0(unlist(covariates), collapse = "+"),"+",
+#'                 paste0(baseline_var, collapse = "+"))
+#'res_gform <- gformula(formula = formula, baseline = baseline_var, covariates = covariates,
+#'treatment = treatment_var, outcome = "y", ntimes_interval = 6, obsdata =   obsdata )
+
 #' @author Awa Diop, Denis Talbot
 
 

diff --git a/R/ggtraj.R b/R/ggtraj.R
@@ -1,15 +1,13 @@
 #' @title ggplot Trajectory
-#' @description Use \code{"ggplot2"} to plot trajectory groups produced by the function \code{"build_traj"} using the observed treatment.
-#' @param obsdata Data to plot trajectory groups.
-#' @param treatment Name of the time-varying treatment.
-#' @param time Name of the variable measurements of time.
-#' @param identifier Name of the identifier variable.
+#' @description use \code{"ggplot2"} to plot trajectory groups produced by the function \code{"build_traj"} using the observed treatment.
+#' @param treatment name of the time-varying treatment.
+#' @param time name of the time variable.
+#' @param identifier name of the identifier variable.
 #' @param class Name of the trajectory groups.
-#' @param fun Specify what statistics to display, by default calculate the mean.
-#' @param data_post Matrix of the posterior probabilities and the trajectory groups.
-#' @param traj_data Merged datasets containing observed data in long format and trajectory groups.
-#' @param \dots Additional arguments to be passed to ggplot functions.
-#' @return A ggplot object representing the trajectory groups using the observed treatment.
+#' @param FUN specify what statistics to display, by default calculate the mean.
+#' @param traj_data merged datasets containing observed data in long format and trajectory groups.
+#' @param \dots additional arguments to be passed to ggplot functions.
+#' @return a ggplot object representing the trajectory groups using the observed treatment.
 #' @import ggplot2 flexmix
 #' @export ggtraj
 #' @examples
@@ -23,8 +21,8 @@
 #'     Aggtraj_data <- aggregate(AggFormula, data = traj_data_long, FUN = mean)
 #'     Aggtraj_data
 #' #Aggtraj_data with labels
-#' traj_data_long[ , "traj_group"] <- factor(ifelse(traj_data_long[ , "class"] == "2" ,"Group3" ,
-#' ifelse (traj_data_long[ , "class"]== "1" , "Group1" ,"Group2")))
+#' traj_data_long[ , "traj_group"] <- factor(ifelse(traj_data_long[ , "class"] == "3" ,"Group1" ,
+#' ifelse (traj_data_long[ , "class"]== "1" , "Group2" ,"Group3")))
 #' AggFormula <- as.formula(paste("statins", "~", "time", "+", "traj_group"))
 #' Aggtraj_data <- aggregate(AggFormula, data = traj_data_long, FUN = mean)
 #' ggtraj(traj_data =  Aggtraj_data,
@@ -39,9 +37,9 @@ ggtraj <- function(traj_data, treatment, time, identifier, class, FUN = mean, ..
     geom_point(size = 3.1) +
     geom_line(size = 1.1) +
     scale_color_brewer(palette = "Set1") +
-    labs(title = "Observed Treatment Over Time by Trajectory Group",
+    labs(title = "Mean Marker Stress Over Time by Trajectory Group",
          x = "Time",
-         y = "Adherence Probability",
+         y = "Mean Marker Stress ",
          color = "Trajectory Group",
          shape = "Trajectory Group",
          linetype = "Trajectory Group") +

diff --git a/R/pltmle.R b/R/pltmle.R
@@ -6,11 +6,14 @@
 #' @param baseline name of baseline covariates.
 #' @param covariates covariates.
 #' @param treatment time-varying treatment.
+#' @param outcome name of the outcome variable.
 #' @param time name of the time variable.
 #' @param time_values measuring times.
 #' @param total_followup number of measuring times per interval.
 #' @param ntimes_interval length of a time-interval (s).
+#' @param traj matrix of indicators for trajectory groups.
 #' @param number_traj an integer to choose the number of trajectory groups.
+#' @param treshold for weight truncation.
 #' @param obsdata observed data in wide format.
 #' @returns  \item{list_pltmle_countermeans}{Counterfactual means and influence functions with the pooled ltmle.}
 #' \item{D}{Influence functions}
@@ -106,7 +109,7 @@ pltmle <- function(formula, outcome, treatment, covariates, baseline, ntimes_int
   Hs = do.call(rbind, Hs.all)
 
   # Update the risk for each regime of treatment
-  modEs = glm(as.formula(paste0(outcome, "~", "-1 + offset(qlogis(unlist(Qs))) + Hs")), family = binomial, data = obsdata.all2);;
+  modEs = glm(as.formula(paste0(outcome, "~", "-1 + offset(qlogis(unlist(Qs))) + Hs")), family = binomial, data = obsdata.all2)
 
   coef_Es = ifelse(is.na(coef(modEs)), 0, coef(modEs))
 

diff --git a/R/split_data.R b/R/split_data.R
@@ -4,7 +4,8 @@
 #' @name split_data
 #' @param obsdata observed data in wide format.
 #' @param identifier identifier of individuals.
-#' @param ntimes_length number of measuring times per interval.
+#' @param ntimes_interval number of measuring times per interval.
+#' @param time_values measuring times.
 #' @param total_followup total length of follow-up.
 #' @param time name of the time variable.
 #' @return \item{all_df}{all subsets, list of time intervals.}

diff --git a/R/stabilized_ipcw.R b/R/stabilized_ipcw.R
@@ -10,7 +10,6 @@
 #' @return Stabilized Inverse Probability of Censoring Weights
 #' @keywords internal
 #' @noRd
-#' @export
 #' @author Awa Diop, Denis Talbot
 #' @note This function requires data in a wide format.
 #' @examples
@@ -74,8 +73,8 @@ stabilized_ipcw <- function(identifier, treatment, covariates, baseline, censor,
 
   # Formulating denominator model for treatment and censoring at t=1
   # Including only the baseline covariates since it's the first time point
-  form_denom_treatment_t1 <- as.formula(paste0(treatment[1], "~", paste0(baseline, collapse = "+")))
-  form_denom_censor_t1 <- as.formula(paste0(censor[1], "~", paste0(baseline, collapse = "+")))
+  form_denom_treatment_t1 <- as.formula(paste0(treatment[1], "~", paste0(unlist(covariates[1]),collapse = "+"), "+", paste0(baseline, collapse = "+")))
+  form_denom_censor_t1 <- as.formula(paste0(censor[1], "~", paste0(unlist(covariates[1]),collapse = "+"), "+", paste0(baseline, collapse = "+")))
 
   # Fitting models for numerator at t=1
   fit_num_treatment_t1 <- glm(form_num_treatment_t1, family = binomial(link = "logit"), data = obsdata)

diff --git a/R/stabilized_iptw.R b/R/stabilized_iptw.R
@@ -9,7 +9,6 @@
 #' @return Stabilized inverse of treatment probabilities
 #' @keywords internal
 #' @noRd
-#' @export
 #' @note This function requires data in a wide format.
 #' @author Awa Diop, Denis Talbot
 #' @examples
@@ -55,6 +54,7 @@ stabilized_iptw <- function(identifier, treatment, covariates, baseline, obsdata
 
   first_time_covariates <- paste0(covariates[grep("1", covariates)], collapse = "+")
   form_num_t1 <- as.formula(paste0(treatment[1], "~ 1"))
+
   form_denom_t1 <- as.formula(paste0(treatment[1], "~", paste0(unlist(covariates[1]),collapse = "+"), "+", paste0(baseline, collapse = "+")))
 
   fit_num_t1 <- glm(form_num_t1, family = binomial(link = "logit"), data = obsdata)

diff --git a/R/trajhrmsm_gform.R b/R/trajhrmsm_gform.R
@@ -2,17 +2,16 @@
 #' @description Estimate parameters of LCGA-HRMSM Using g-formula.
 #'  and bootstrap to get standard errors.
 #' @name trajhrmsm_gform
-#' @param formula specification of the model for the outcome to be fitted for a binomial or gaussian distribution.
 #' @param family specification of the error distribution and link function to be used in the model.
 #' @param degree_traj to specify the polynomial degree for modelling the time-varying treatment.
 #' @param identifier  name of the column for unique identifiant.
 #' @param baseline name of baseline covariates.
 #' @param covariates names of time-varying covariates in a wide format.
 #' @param treatment name of time-varying treatment.
+#' @param outcome name of the outcome variable.
 #' @param var_cov names of the time-varying covariates in a long format.
 #' @param ntimes_interval length of a time-interval (s).
 #' @param total_followup total length of follow-up.
-#' @param censor name of the censoring variable.
 #' @param number_traj number of trajectory groups.
 #' @param rep number of repetition for the bootstrap.
 #' @param obsdata data in a long format.
@@ -27,6 +26,8 @@
 #'   }
 #' @importFrom stats na.omit rbinom plogis qlogis  reshape glm
 #' binomial coef as.formula ave aggregate relevel pnorm sd quantile model.matrix
+#' quasibinomial var
+#' @importFrom utils combn
 #' @export
 #' @author Awa Diop Denis Talbot
 #' @examples

diff --git a/R/trajhrmsm_ipw.R b/R/trajhrmsm_ipw.R
@@ -1,20 +1,24 @@
 #' @title History Restricted MSM and Latent Class of Growth Analysis estimated with IPW.
 #' @description Estimate parameters of LCGA-HRMSM Using a Pooled LTMLE.
 #' @name trajhrmsm_ipw
-#' @param formula specification of the model for the outcome to be fitted for a binomial or gaussian distribution.
 #' @param family specification of the error distribution and link function to be used in the model.
+#' @param numerator To choose between stabilized and unstabilized weights.
 #' @param degree_traj to specify the polynomial degree for modelling the time-varying treatment.
 #' @param identifier  name of the column for unique identifiant.
 #' @param baseline name of baseline covariates.
 #' @param covariates names of time-varying covariates in a wide format.
 #' @param treatment name of time-varying treatment.
+#' @param outcome name of the outcome variable.
 #' @param var_cov names of the time-varying covariates in a long format.
 #' @param ntimes_interval length of a time-interval (s).
 #' @param total_followup total length of follow-up.
 #' @param censor name of the censoring variable.
+#' @param include_censor Logical, if TRUE, includes censoring.
 #' @param number_traj number of trajectory groups.
 #' @param weights a vector of estimated weights. If NULL, the weights are computed by the function.
 #' @param time_values values of the time variable.
+#' @param time name of the time variable.
+#' @param treshold for weight truncation.
 #' @param obsdata data in a long format.
 #' @return A list containing the following components:
 #'   \itemize{
@@ -24,11 +28,11 @@
 #'   \item{mean_adh}{Mean adherence per trajectory group.}
 #'   }
 #' @author Awa Diop, Denis Talbot
+#' @import sandwich
+#' @import flexmix
 #' @importFrom stats na.omit rbinom plogis qlogis  reshape glm
 #' binomial coef as.formula ave aggregate relevel pnorm sd quantile model.matrix
 #' @export trajhrmsm_ipw
-#' @import sandwich
-#' @import flexmix
 #' @examples
 #' obsdata_long = gendata(n = 1000, format = "long", total_followup = 8,
 #' timedep_outcome = TRUE,  seed = 945)
@@ -83,7 +87,7 @@ trajhrmsm_ipw <- function(degree_traj = c("linear","quadratic","cubic"),
 
   treatment_names <- sub("\\d+", "", treatment)
   treatment_name <- unique(treatment_names)[1]
-  #Choice of degree for the polynomial form to buil the trajectory groups
+  #Choice of degree for the polynomial form to build the trajectory groups
   if(degree_traj == "linear"){
 
   restraj = build_traj(obsdata  = na.omit(dat_sub[,c(treatment_name,"time2","identifier2")]),
@@ -119,7 +123,7 @@ trajhrmsm_ipw <- function(degree_traj = c("linear","quadratic","cubic"),
     dat_final[ ,"ipw_group"] <- relevel(factor(dat_final[ ,"ipw_group"]), ref = ref)
 
 
-    mod_glm = glm(formula =as.formula(paste(outcome, "~ factor(ipw_group) + factor(Interv)")), weights = IPW,family = family,data=dat_final);
+    mod_glm = glm(formula =as.formula(paste(outcome, "~ factor(ipw_group) + factor(Interv)")), weights = dat_final[,"IPW"],family = family,data=dat_final);
     coefs <- summary(mod_glm)$coefficients[1:number_traj,1];
     se   <- sqrt(diag(vcovCL(mod_glm, cluster = cluster_formula)))[1:number_traj];
     pvalue <- round(coef(summary(mod_glm))[1:number_traj,4],4)