Updates 14

awamaeva · Jan 30, 2024 · 5a84b29 · 5a84b29
1 parent f6ee72d
commit 5a84b29
Show file tree

Hide file tree

Showing 24 changed files with 330 additions and 86 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -10,6 +10,7 @@ Description: Implements marginal structural models combined with latent class gr
 License: GPL (>= 3)
 Encoding: UTF-8
 Imports: 
+    stats,
     class,
     e1071,
     flexmix,

diff --git a/NAMESPACE b/NAMESPACE
@@ -12,6 +12,7 @@ export(stabilized_ipcw)
 export(stabilized_iptw)
 export(trajhrmsm_gform)
 export(trajhrmsm_ipw)
+export(trajhrmsm_pltmle)
 export(trajmsm_gform)
 export(trajmsm_ipw)
 export(trajmsm_pltmle)
@@ -21,4 +22,20 @@ import(e1071)
 import(flexmix)
 import(ggplot2)
 import(sandwich)
+importFrom(stats,aggregate)
+importFrom(stats,as.formula)
+importFrom(stats,ave)
+importFrom(stats,binomial)
+importFrom(stats,coef)
+importFrom(stats,glm)
+importFrom(stats,model.matrix)
+importFrom(stats,na.omit)
+importFrom(stats,plogis)
+importFrom(stats,pnorm)
+importFrom(stats,qlogis)
+importFrom(stats,quantile)
+importFrom(stats,rbinom)
+importFrom(stats,relevel)
+importFrom(stats,reshape)
+importFrom(stats,sd)
 importFrom(survival,coxph)
diff --git a/R/IPW.R b/R/IPW.R
@@ -6,7 +6,6 @@
 #' @param baseline Name of the baseline covariates.
 #' @param covariates Name of the time-varying covariates.
 #' @param treatment Name of the time-varying treatment.
-#' @param total_followup Total length of follow-up.
 #' @param include_censor Logical value TRUE/FALSE to include or not a censoring variable.
 #' @param censor Name of the censoring variable.
 #' @param obsdata Observed data in wide format.
@@ -21,11 +20,11 @@
 #' treatment_var <- c("statins2011","statins2012","statins2013")
 #' stabilized_weights = inverse_probability_weighting(numerator = "stabilized",
 #' identifier = "id", covariates = covariates, treatment = treatment_var,
-#' baseline = baseline_var, total_followup = 3,obsdata = obsdata)
+#' baseline = baseline_var, obsdata = obsdata)
 
 inverse_probability_weighting <- function(numerator = c("stabilized", "unstabilized"), identifier,
                                           baseline, covariates, treatment,
-                                          total_followup, include_censor = FALSE, censor,
+                                           include_censor = FALSE, censor,
                                           obsdata){
   if (!is.data.frame(obsdata)) {
     stop("obsdata must be a data frame in wide format")
@@ -36,11 +35,16 @@ inverse_probability_weighting <- function(numerator = c("stabilized", "unstabili
 
   ipw_result <- switch(numerator,
                        "unstabilized" = ifelse(include_censor,
-                                               unstabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_followup, obsdata),
-                                               unstabilized_iptw(identifier, treatment, covariates, baseline, total_followup, obsdata)),
+                                               unstabilized_ipcw(identifier = identifier, treatment = treatment,
+                                                                 covariates = covariates, baseline = baseline,
+                                                                 censor = censor, obsdata = obsdata),
+                                               unstabilized_iptw(identifier = identifier, treatment = treatment,
+                                                                 covariates = covariates, baseline = baseline,  obsdata = obsdata)),
                        "stabilized" = ifelse(include_censor,
-                                             stabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_followup, obsdata),
-                                             stabilized_iptw(identifier, treatment, covariates, baseline, total_followup, obsdata))
+                                             stabilized_ipcw(identifier = identifier, treatment = treatment,
+                                                             covariates = covariates, baseline = baseline, censor = censor,  obsdata = obsdata),
+                                             stabilized_iptw(identifier = identifier, treatment = treatment, covariates = covariates,
+                                                             baseline = baseline,  obsdata = obsdata))
   )
 
   class(ipw_result) <- "IPW"

diff --git a/R/gendata.R b/R/gendata.R
@@ -3,13 +3,18 @@
 #' @param include_censor Logical, if TRUE, includes censoring.
 #' @param format Character, either "long" or "wide" for the format of the output data frame.
 #' @param timedep_outcome Logical, if TRUE, includes a time-dependent outcome.
+#' @param start_year baseline year.
+#' @param total_followup Number of measuring times.
 #' @param seed, use a specific seed value to ensure the simulated data is replicable.
+#' @importFrom stats na.omit rbinom plogis qlogis  reshape glm
+#' binomial coef as.formula
 #' @export gendata
 #' @return A data frame with generated trajectories.
 #' @examples
 #' gendata(n = 100, include_censor = FALSE, format = "wide",total_followup = 3, seed = 945)
 
-gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_year = 2011, total_followup, timedep_outcome = FALSE, seed) {
+gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),
+                   start_year = 2011, total_followup, timedep_outcome = FALSE, seed) {
   set.seed(seed)
 
   # Common variables for all scenarios
@@ -33,16 +38,6 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
     }
   }
 
-  # Apply censoring
-  if (include_censor) {
-    for (i in 2:total_followup) {
-      statins[, i][censor[, i-1] == 1] <- NA
-      hyper[, i][censor[, i-1] == 1] <- NA
-      bmi[, i][censor[, i-1] == 1] <- NA
-      censor[, i][censor[, i-1] == 1] <- 1
-    }
-  }
-
   if(timedep_outcome == FALSE){
   y <- rbinom(n, 1, plogis(-2.5 + rowSums(sapply(1:total_followup, function(i)
  -0.5* statins[, i] + 0.25 * hyper[, i] + 0.25* bmi[, i]))))
@@ -55,6 +50,16 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
 
   colnames(obsdata)[5:ncol(obsdata)] <- coln
 
+  # Apply censoring
+  if (include_censor) {
+    for (i in 2:total_followup) {
+      statins[, i][censor[, i-1] == 1] <- NA
+      hyper[, i][censor[, i-1] == 1] <- NA
+      bmi[, i][censor[, i-1] == 1] <- NA
+      censor[, i][censor[, i-1] == 1] <- 1
+    }
+  }
+
 
   if (format == "long") {
 
@@ -66,6 +71,8 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
                             v.names = "bmi", direction = "long")
     obsdata = data.frame(id = statins_long$id, time = statins_long$time, statins = statins_long$statins, hyper = hyper_long$hyper, bmi = bmi_long$bmi,age = age, sex=sex, y = y)
   }
+
+
     if (include_censor && format == "long"){
       obsdata = obsdata_long
       obsdata$censor <- as.vector(censor)
@@ -78,9 +85,11 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
     obsdata <- cbind(obsdata, censor)
   }
 
+
 }
 
   if(timedep_outcome == TRUE){
+
     y <- matrix(NA, nrow = n, ncol = total_followup)
     y[, 1] <- rbinom(n, 1, plogis(-2.5 -0.5 * statins[, 1] + 0.25* hyper[, 1] + 0.25 * bmi[, 1]))
 
@@ -89,6 +98,16 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
     y[,i][y[, i-1] == 1] <- 1
     }
 
+    # Apply censoring
+    if (include_censor) {
+      for (i in 2:total_followup) {
+        statins[, i][censor[, i-1] == 1] <- NA
+        hyper[, i][censor[, i-1] == 1] <- NA
+        bmi[, i][censor[, i-1] == 1] <- NA
+        censor[, i][censor[, i-1] == 1] <- 1
+      }
+    }
+
     obsdata <- data.frame(id = id, age = age, sex = sex,statins = statins, hyper = hyper, bmi = bmi,y = y)
 
     coln <- paste0(rep(c("statins", "hyper", "bmi","y"), each = total_followup), seq(start_year, start_year+total_followup-1, by = 1))
@@ -108,6 +127,7 @@ gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_y
 
     }
 
+
       if (include_censor  && format == "long") {
         obsdata = obsdata_long
         obsdata$censor <- as.vector(censor)

diff --git a/R/pltmle.R b/R/pltmle.R
@@ -3,9 +3,11 @@
 #' @name pltmle
 #' @param formula specification of the model for the outcome to be fitted.
 #' @param identifier  name of the column for unique identifiant.
+#' @param baseline name of baseline covariates.
 #' @param covariates covariates.
 #' @param treatment time-varying treatment.
 #' @param time name of the time variable.
+#' @param time_values measuring times.
 #' @param total_followup number of measuring times per interval.
 #' @param ntimes_interval length of a time-interval (s).
 #' @param number_traj an integer to choose the number of trajectory groups.
@@ -30,7 +32,7 @@
 #'     AggTrajData <- aggregate(AggFormula, data = trajmsm_long, FUN = mean)
 #'     AggTrajData
 #' trajmsm_long[ , "traj_group"] <- trajmsm_long[ , "class"]
-#' trajmsm_wide = reshape(trajmsm_long, direction = "wide", idvar = "id",
+#' obsdata= reshape(trajmsm_long, direction = "wide", idvar = "id",
 #' v.names = c("statins","bmi","hyper"), timevar = "time", sep ="")
 #' formula =  as.formula(" y ~ statins2011 + statins2012 + statins2013 +
 #' hyper2011 + bmi2011 + hyper2012 + bmi2012 +
@@ -42,7 +44,7 @@
 #' traj[,1]=1
 #' res_pltmle = pltmle(formula = formula, outcome = "y",treatment = treatment_var,
 #' covariates = covariates, baseline = baseline_var, ntimes_interval = 3, number_traj = 3,
-#'  time =  "time",time_values = time_values,identifier = "id",obsdata = trajmsm_wide,
+#'  time =  "time",time_values = time_values,identifier = "id",obsdata = obsdata,
 #' traj=traj, treshold = 0.99)
 #' res_pltmle$counter_means
 #' @author Awa Diop, Denis Talbot
@@ -87,8 +89,8 @@ pltmle <- function(formula, outcome, treatment, covariates, baseline, ntimes_int
   # Compute the weights for all different regimes of treatment
   Weights = inverse_probability_weighting(identifier = identifier, covariates = covariates,
                                           treatment = treatment, baseline = baseline,
-                                          total_followup = total_followup, numerator = "unstabilized",
-                                          include_censor = FALSE, censor = censor,obsdata = obsdata)[[1]];
+                                          numerator = "unstabilized",
+                                          include_censor = FALSE, obsdata = obsdata)[[1]];
 
   weights_trunc <- sapply(1:ntimes_interval, function(x){
   weights <- ifelse(quantile(Weights[, x], treshold, na.rm = TRUE)> Weights[, x], quantile(Weights[, x], treshold, na.rm = TRUE), Weights[, x])

diff --git a/R/stabilized_ipcw.R b/R/stabilized_ipcw.R
@@ -5,21 +5,32 @@
 #' @param treatment Time-varying treatment.
 #' @param covariates List of time-varying covariates.
 #' @param baseline Baseline covariates.
-#' @param total_follow_up Total length of follow-up.
 #' @param censor Name of the censoring variable.
 #' @param obsdata Observed data in wide format.
 #' @return Stabilized Inverse Probability of Censoring Weights
+#' @keywords internal
 #' @noRd
 #' @export
 #' @author Awa Diop, Denis Talbot
 #' @note This function requires data in a wide format.
-
-stabilized_ipcw <- function(identifier, treatment, covariates, baseline, censor, total_follow_up, obsdata) {
+#' @examples
+#' obsdata = gendata(n = 1000, format = "wide",include_censor = TRUE,seed = 945)
+#' baseline_var <- c("age","sex")
+#' covariates <- list(c("hyper2011", "bmi2011"),
+#' c("hyper2012", "bmi2012"),c("hyper2013", "bmi2013"))
+#' treatment_var <- c("statins2011","statins2012","statins2013")
+#' censor_var <- c("censor2011", "censor2012","censor2013")
+#' stabilized_weights = stabilized_ipcw(
+#' identifier = "id", covariates = covariates, treatment = treatment_var,
+#' baseline = baseline_var, censor = censor_var,obsdata = obsdata)
+#' summary(stabilized_weights[[1]])
+
+stabilized_ipcw <- function(identifier, treatment, covariates, baseline, censor, obsdata) {
   if (!is.data.frame(obsdata)) {
     stop("obsdata must be a data frame in wide format")
   }
 
-  required_args <- c("identifier", "baseline", "covariates", "treatment", "total_follow_up", "censor", "obsdata")
+  required_args <- c("identifier", "baseline", "covariates", "treatment", "censor", "obsdata")
   missing_args <- setdiff(required_args, names(match.call()))
   if (length(missing_args) > 0) {
     stop(paste(missing_args, collapse = ", "), " not specified")
@@ -49,10 +60,11 @@ stabilized_ipcw <- function(identifier, treatment, covariates, baseline, censor,
     ps_censor <- predict(fit_num_censor, type = "response")
 
 
-    sw_treatment_temp[, i][obsdata[,censor[i-1]] == 0] <-  ((1 - obsdata[, treatment[i]][obsdata[,censor[i-1]] == 0]) * (1 - ps_treatment[obsdata[,censor[i-1]] == 0])) / (1 - predict(fit_denom_treatment, type = "response")) +
-      (obsdata[, treatment[i]][obsdata[,censor[i-1]] == 0] * ps_treatment[obsdata[,censor[i-1]] == 0]) / predict(fit_denom_treatment, type = "response")
+    sw_treatment_temp[, i][obsdata[,censor[i-1]] == 0] <-  ((1 - obsdata[, treatment[i]][obsdata[,censor[i-1]] == 0]) * (1 -
+                                      ps_treatment[obsdata[,censor[i-1]] == 0])) / (1 - predict(fit_denom_treatment, type = "response")[obsdata[,censor[i-1]] == 0]) +
+      (obsdata[, treatment[i]][obsdata[,censor[i-1]] == 0] * ps_treatment[obsdata[,censor[i-1]] == 0]) / predict(fit_denom_treatment, type = "response")[obsdata[,censor[i-1]] == 0]
 
-    sw_censor_temp[, i][obsdata[,censor[i-1]] == 0] <- ((1 - ps_censor[obsdata[,censor[i-1]] == 0]) / predict(fit_denom_censor, type = "response"))
+    sw_censor_temp[, i][obsdata[,censor[i-1]] == 0] <- ((1 - ps_censor[obsdata[,censor[i-1]] == 0]) / predict(fit_denom_censor, type = "response")[obsdata[,censor[i-1]] == 0])
   }
 
   # Calculate stabilized weights for the first time point

diff --git a/R/stabilized_iptw.R b/R/stabilized_iptw.R
@@ -5,20 +5,30 @@
 #' @param treatment Time-varying treatment.
 #' @param covariates Time-varying covariates.
 #' @param baseline Baseline covariates.
-#' @param total_followup Total length of follow-up.
 #' @param obsdata Observed data in wide format.
 #' @return Stabilized inverse of treatment probabilities
+#' @keywords internal
 #' @noRd
 #' @export
 #' @note This function requires data in a wide format.
 #' @author Awa Diop, Denis Talbot
+#' @examples
+#' obsdata = gendata(n = 1000, format = "wide",total_followup =3, seed = 945)
+#' baseline_var <- c("age","sex")
+#' covariates <- list(c("hyper2011", "bmi2011"),
+#' c("hyper2012", "bmi2012"),c("hyper2013", "bmi2013"))
+#' treatment_var <- c("statins2011","statins2012","statins2013")
+#' stabilized_weights = stabilized_iptw(
+#' identifier = "id", covariates = covariates, treatment = treatment_var,
+#' baseline = baseline_var,obsdata = obsdata)
+#' summary(stabilized_weights[[1]])
 
-stabilized_iptw <- function(identifier, treatment, covariates, baseline, total_followup, obsdata) {
+stabilized_iptw <- function(identifier, treatment, covariates, baseline, obsdata) {
   if (!is.data.frame(obsdata)) {
     stop("obsdata needs to be a data frame")
   }
 
-  required_args <- c("identifier", "baseline", "covariates", "treatment", "total_followup", "obsdata")
+  required_args <- c("identifier", "baseline", "covariates", "treatment", "obsdata")
   missing_args <- setdiff(required_args, names(match.call()))
   if (length(missing_args) > 0) {
     stop(paste(missing_args, collapse = ", "), " not specified")

diff --git a/R/trajhrmsm_gform.R b/R/trajhrmsm_gform.R
@@ -18,7 +18,15 @@
 #' @param obsdata data in a long format.
 #' @param time name of the time variable.
 #' @param time_values measuring times.
-#' @return \item{trajhrmsm_gform }{trajhrmsm_gform lcga}
+#' @return A list containing the following components:
+#'   \itemize{
+#'   \item{results_hrmsm_gform}{Estimates of LCGA-HRMSM.}
+#'   \item{result_coef_boot}{Estimates obtained with bootstrap.}
+#'   \item{restraj}{Fitted trajectory model.}
+#'   \item{mean_adh}{Mean adherence per trajectory group.}
+#'   }
+#' @importFrom stats na.omit rbinom plogis qlogis  reshape glm
+#' binomial coef as.formula ave aggregate relevel pnorm sd quantile model.matrix
 #' @export
 #' @author Awa Diop Denis Talbot
 #' @examples
@@ -191,7 +199,7 @@ trajhrmsm_gform <- function(degree_traj = c("linear","quadratic","cubic"),
   results = cbind(coefs ,se,pvalue, lo.ci, up.ci);
   colnames(results) =  c("Estimate", "Std.Error", "Pvalue", "Lower CI", "Upper CI");
   results_hrmsm_gform = results
-  return(list(  results_hrmsm_gform = results_hrmsm_gform, result_coef_boot = result_coef_boot, restraj = restraj, mean_adh = mean_adh));
+  return(list(results_hrmsm_gform = results_hrmsm_gform, result_coef_boot = result_coef_boot, restraj = restraj, mean_adh = mean_adh));
   }
 
 }
diff --git a/R/trajhrmsm_ipw.R b/R/trajhrmsm_ipw.R
@@ -16,12 +16,22 @@
 #' @param weights a vector of estimated weights. If NULL, the weights are computed by the function.
 #' @param time_values values of the time variable.
 #' @param obsdata data in a long format.
+#' @return A list containing the following components:
+#'   \itemize{
+#'   \item{results_hrmsm_ipw}{Estimates of LCGA-HRMSM.}
+#'   \item{result_coef_boot}{Estimates obtained with bootstrap.}
+#'   \item{restraj}{Fitted trajectory model.}
+#'   \item{mean_adh}{Mean adherence per trajectory group.}
+#'   }
 #' @author Awa Diop, Denis Talbot
+#' @importFrom stats na.omit rbinom plogis qlogis  reshape glm
+#' binomial coef as.formula ave aggregate relevel pnorm sd quantile model.matrix
 #' @export trajhrmsm_ipw
 #' @import sandwich
 #' @import flexmix
 #' @examples
-#' obsdata_long = gendata(n = 1000, format = "long", total_followup = 8, timedep_outcome = TRUE,  seed = 945)
+#' obsdata_long = gendata(n = 1000, format = "long", total_followup = 8,
+#' timedep_outcome = TRUE,  seed = 945)
 #' baseline_var <- c("age","sex")
 #' years <- 2011:2018
 #' variables <- c("hyper", "bmi")
@@ -30,8 +40,10 @@
 #' treatment_var <- paste0("statins", 2011:2018)
 #' var_cov <- c("statins","hyper", "bmi","y")
 #' reshrmsm_ipw <- trajhrmsm_ipw(degree_traj = "linear", numerator = "stabilized",
-#' identifier = "id", baseline = baseline_var, covariates = covariates, treatment = treatment_var,
-#' outcome = "y", var_cov= var_cov,include_censor = FALSE, ntimes_interval = 6,total_followup = 8, time = "time", time_values = 2011:2018,
+#' identifier = "id", baseline = baseline_var,
+#' covariates = covariates, treatment = treatment_var,
+#' outcome = "y", var_cov= var_cov,include_censor = FALSE,
+#'  ntimes_interval = 6,total_followup = 8, time = "time", time_values = 2011:2018,
 #' family = "poisson", number_traj = 3, obsdata = obsdata_long, treshold = 0.999)
 #' reshrmsm_ipw$res_trajhrmsm_ipw
 
@@ -49,7 +61,7 @@ trajhrmsm_ipw <- function(degree_traj = c("linear","quadratic","cubic"),
     # Compute the weights for all different regimes of treatment
     Weights = inverse_probability_weighting(identifier = identifier, covariates = covariates,
                                             treatment = treatment, baseline = baseline,
-                                            total_followup = total_followup, numerator = numerator,
+                                            numerator = numerator,
                                             include_censor = include_censor, censor = censor,obsdata = obsdata_wide)[[1]];
 
       weights_trunc <- sapply(1:total_followup, function(x){