Updates 11

awamaeva · Jan 29, 2024 · 13ddf78 · 13ddf78
1 parent 2cf0c4e
commit 13ddf78
Show file tree

Hide file tree

Showing 14 changed files with 486 additions and 337 deletions.
diff --git a/R/.DS_Store b/R/.DS_Store
diff --git a/R/IPW.R b/R/IPW.R
@@ -6,7 +6,7 @@
 #' @param baseline Name of the baseline covariates.
 #' @param covariates Name of the time-varying covariates.
 #' @param treatment Name of the time-varying treatment.
-#' @param total_follow_up Total length of follow-up.
+#' @param total_followup Total length of follow-up.
 #' @param include_censor Logical value TRUE/FALSE to include or not a censoring variable.
 #' @param censor Name of the censoring variable.
 #' @param obsdata Observed data in wide format.
@@ -20,11 +20,11 @@
 #' treatment_var <- c("statins2011","statins2012","statins2013")
 #' stabilized_weights = inverse_probability_weighting(numerator = "stabilized", identifier = "id",
 #'         covariates = covar, treatment = treatment_var, baseline = baseline_var,
-#'           total_follow_up = 3,obsdata = Obsdata)
+#'           total_followup = 3,obsdata = Obsdata)
 
 inverse_probability_weighting <- function(numerator = c("stabilized", "unstabilized"), identifier,
                                           baseline, covariates, treatment,
-                                          total_follow_up, include_censor = FALSE, censor,
+                                          total_followup, include_censor = FALSE, censor,
                                           obsdata){
   if (!is.data.frame(obsdata)) {
     stop("obsdata must be a data frame in wide format")
@@ -35,11 +35,11 @@ inverse_probability_weighting <- function(numerator = c("stabilized", "unstabili
 
   ipw_result <- switch(numerator,
                        "unstabilized" = ifelse(include_censor,
-                                               unstabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_follow_up, obsdata),
-                                               unstabilized_iptw(identifier, treatment, covariates, baseline, total_follow_up, obsdata)),
+                                               unstabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_followup, obsdata),
+                                               unstabilized_iptw(identifier, treatment, covariates, baseline, total_followup, obsdata)),
                        "stabilized" = ifelse(include_censor,
-                                             stabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_follow_up, obsdata),
-                                             stabilized_iptw(identifier, treatment, covariates, baseline, total_follow_up, obsdata))
+                                             stabilized_ipcw(identifier, treatment, covariates, baseline, censor, total_followup, obsdata),
+                                             stabilized_iptw(identifier, treatment, covariates, baseline, total_followup, obsdata))
   )
 
   class(ipw_result) <- "IPW"

diff --git a/R/gendata.R b/R/gendata.R
@@ -0,0 +1,128 @@
+#' @title Generate Data Trajectories for MSM
+#' @param n Number of observations to generate.
+#' @param include_censor Logical, if TRUE, includes censoring.
+#' @param format Character, either "long" or "wide" for the format of the output data frame.
+#' @param timedep_outcome Logical, if TRUE, includes a time-dependent outcome.
+#' @param seed, use a specific seed value to ensure the simulated data is replicable.
+#' @export gendata
+#' @return A data frame with generated trajectories.
+#' @examples
+#' gendata(n = 100, include_censor = FALSE, format = "wide",total_followup = 3)
+
+gendata<- function(n, include_censor = FALSE, format = c("long", "wide"),start_year = 2011, total_followup, timedep_outcome = FALSE, seed) {
+  set.seed(seed)
+
+  # Common variables for all scenarios
+  id <- 1:n
+  age <- rbinom(n, 1, 0.5)
+  sex <- rbinom(n, 1, 0.5)
+
+  # Initialize variables
+  statins <- hyper <- bmi <- censor <- matrix(NA, nrow = n, ncol = total_followup)
+  bmi[, 1] <-   rbinom(n, 1, plogis(0.15 * age + 0.7 * sex))
+  hyper[, 1] <-  rbinom(n, 1, plogis(.15 * age + 0.7 * sex + 0.1 * bmi[, 1]))
+  statins[, 1] <- rbinom(n, 1, plogis(-.15 + 0.4 * age + 0.25* sex - 0.1 * bmi[, 1] - 0.2*hyper[, 1]))
+  censor[, 1] <-  rbinom(n, 1, plogis(-2 + 0.2 * age + 0.01 * sex + 0.1 * bmi[, 1] - 0.2*hyper[, 1] - 0.5*statins[, 1]))
+  # Generate data based on conditions
+  for (i in 2:total_followup) {
+    bmi[, i] <- rbinom(n, 1, plogis(0.15 * age + 0.7 * sex - 0.25 * statins[, i-1]))
+    hyper[, i] <- rbinom(n, 1, plogis(0.15 * age + 0.7 * sex + 0.1 * bmi[, i] - 0.35 * statins[, i-1]))
+    statins[, i] <- rbinom(n, 1, plogis(-0.15 + 0.1 * age + 0.1 * sex - 0.1 * bmi[, i] - 0.2*hyper[, i] + (seq(0.15,1,length.out = total_followup)[i-1])* statins[, i-1] ))
+    if (include_censor) {
+      censor[, i] <- rbinom(n, 1, plogis(-2 + 0.02 * age + 0.01 * sex - 0.5 * statins[, i] + 0.1 * hyper[, i] + 0.2 * bmi[, i]))
+    }
+  }
+
+  # Apply censoring
+  if (include_censor) {
+    for (i in 2:total_followup) {
+      statins[, i][censor[, i-1] == 1] <- NA
+      hyper[, i][censor[, i-1] == 1] <- NA
+      bmi[, i][censor[, i-1] == 1] <- NA
+      censor[, i][censor[, i-1] == 1] <- 1
+    }
+  }
+
+  if(timedep_outcome == FALSE){
+  y <- rbinom(n, 1, plogis(-2.5 + rowSums(sapply(1:total_followup, function(i)
+ -0.5* statins[, i] + 0.25 * hyper[, i] + 0.25* bmi[, i]))))
+
+  obsdata <- data.frame(id = id, age = age, sex = sex,y = y,statins = statins, hyper = hyper, bmi = bmi)
+
+  #coln <- paste0(rep(c("statins", "hyper", "bmi"), each = total_followup), seq(start_year, start_year+total_followup-1, by = 1))
+  coln <- paste0(rep(c("statins", "hyper", "bmi"), each = total_followup),
+                 rep(seq(start_year, start_year + total_followup - 1), times = 3))
+
+  colnames(obsdata)[5:ncol(obsdata)] <- coln
+
+
+  if (format == "long") {
+
+    statins_long <- reshape(data.frame(statins), varying = 1:total_followup,
+                            v.names = "statins", times = seq(start_year, start_year+total_followup-1, by = 1), direction = "long")
+    hyper_long <- reshape(data.frame(hyper), varying = 1:total_followup,
+                            v.names = "hyper", direction = "long")
+    bmi_long <- reshape(data.frame(bmi), varying = 1:total_followup,
+                            v.names = "bmi", direction = "long")
+    obsdata = data.frame(id = statins_long$id, time = statins_long$time, statins = statins_long$statins, hyper = hyper_long$hyper, bmi = bmi_long$bmi,age = age, sex=sex, y = y)
+  }
+    if (include_censor && format == "long"){
+      obsdata = obsdata_long
+      obsdata$censor <- as.vector(censor)
+    }
+
+
+
+  if (include_censor && format == "wide"){
+    colnames(censor) <- paste0("censor", seq(start_year, start_year+total_followup-1, by = 1))
+    obsdata <- cbind(obsdata, censor)
+  }
+
+}
+
+  if(timedep_outcome == TRUE){
+    y <- matrix(NA, nrow = n, ncol = total_followup)
+    y[, 1] <- rbinom(n, 1, plogis(-2.5 -0.5 * statins[, 1] + 0.25* hyper[, 1] + 0.25 * bmi[, 1]))
+
+    for (i in 2:total_followup) {
+    y[,i] <- rbinom(n, 1, plogis(-2.5 -0.5 * statins[, i] + 0.25* hyper[, i] + 0.25 * bmi[, i]))
+    y[,i][y[, i-1] == 1] <- 1
+    }
+
+    obsdata <- data.frame(id = id, age = age, sex = sex,statins = statins, hyper = hyper, bmi = bmi,y = y)
+
+    coln <- paste0(rep(c("statins", "hyper", "bmi","y"), each = total_followup), seq(start_year, start_year+total_followup-1, by = 1))
+    colnames(obsdata)[4:ncol(obsdata)] <- coln
+
+    if (format == "long") {
+      statins_long <- reshape(data.frame(statins), varying = 1:total_followup,
+                              v.names = "statins",times =seq(start_year, start_year+total_followup-1, by = 1),  direction = "long")
+      hyper_long <- reshape(data.frame(hyper), varying = 1:total_followup,
+                            v.names = "hyper", direction = "long")
+      bmi_long <- reshape(data.frame(bmi), varying = 1:total_followup,
+                          v.names = "bmi", direction = "long")
+      y_long <- reshape(data.frame(y), varying = 1:total_followup,
+                          v.names = "y", direction = "long")
+      obsdata = data.frame(id = statins_long$id, time = statins_long$time, statins = statins_long$statins,
+                          hyper = hyper_long$hyper, bmi = bmi_long$bmi,age = age, sex=sex, y = y_long$y)
+
+    }
+
+      if (include_censor  && format == "long") {
+        obsdata = obsdata_long
+        obsdata$censor <- as.vector(censor)
+      }
+
+
+    if (include_censor && format == "wide") {
+      colnames(censor) <- paste0("censor", seq(start_year, start_year+total_followup-1, by = 1))
+      obsdata <- cbind(obsdata, censor)
+    }
+
+
+  }
+return(obsdata)
+}
+
+obsdata <- gendata(n = 100, format = "long",timedep_outcome = FALSE, total_followup = 8, seed = 945)
+obsdata <- gendata(n = 100, format = "long",timedep_outcome = TRUE, total_followup = 8, seed = 945)
diff --git a/R/gendata_trajmsm.R b/R/gendata_trajmsm.R
diff --git a/R/ggtraj.R b/R/ggtraj.R
@@ -13,17 +13,17 @@
 #' @import ggplot2 flexmix
 #' @export ggtraj
 #' @examples
-#' Obsdata = gendata(n = 1000, format = "long", total_followup = 12, seed = 945)
-#' restraj = build_traj(obsdata = Obsdata, number_traj = 3, formula = as.formula(cbind(statins, 1 - statins) ~ time), identifier = "id")
+#' obsdata_long = gendata(n = 1000, format = "long", total_followup = 12, seed = 945)
+#' restraj = build_traj(obsdata = obsdata_long, number_traj = 3, formula = as.formula(cbind(statins, 1 - statins) ~ time), identifier = "id")
 #' datapost = restraj$data_post
 #' head(datapost)
-#' traj_data_long <- merge(Obsdata, datapost, by = "id")
+#' traj_data_long <- merge(obsdata_long, datapost, by = "id")
 #'     AggFormula <- as.formula(paste("statins", "~", "time", "+", "class"))
 #'     Aggtraj_data <- aggregate(AggFormula, data = traj_data_long, FUN = mean)
 #'     Aggtraj_data
 #' #Aggtraj_data with labels
-#' traj_data_long[ , "traj_group"] <- factor(ifelse(traj_data_long[ , "class"] == "2" ,"Group1" ,
-#' ifelse (traj_data_long[ , "class"]== "1" , "Group2" ,"Group3")))
+#' traj_data_long[ , "traj_group"] <- factor(ifelse(traj_data_long[ , "class"] == "2" ,"Group3" ,
+#' ifelse (traj_data_long[ , "class"]== "1" , "Group1" ,"Group2")))
 #' AggFormula <- as.formula(paste("statins", "~", "time", "+", "traj_group"))
 #' Aggtraj_data <- aggregate(AggFormula, data = traj_data_long, FUN = mean)
 #' ggtraj(traj_data =  Aggtraj_data,

diff --git a/R/pltmle.R b/R/pltmle.R
@@ -82,7 +82,7 @@ pltmle <- function(formula, outcome, treatment, covariates, baseline, ntimes_int
   # Compute the weights for all different regimes of treatment
   Weights = inverse_probability_weighting(identifier = identifier, covariates = covariates,
                                           treatment = treatment, baseline = baseline,
-                                          total_follow_up = total_followup, numerator = "unstabilized",
+                                          total_followup = total_followup, numerator = "unstabilized",
                                           include_censor = include_censor, censor = censor,obsdata = obsdata)[[1]];
 
   weights_trunc <- sapply(1:ntimes_interval, function(x){
@@ -142,23 +142,24 @@ pltmle <- function(formula, outcome, treatment, covariates, baseline, ntimes_int
     Ds = lapply(1:nregimes,function(x) sapply(1:number_traj,function(y)
       as.matrix(Hs.all[[x]][,y]*(Qstar[[x]]-Qstarm1[[x]]))))
     D_list[i]<-list(Ds)
+    Qstar = Qstarm1;
   }
 
 
 
   # Aggregate the results
   Q0 = unlist(Qstar)
-  obsdata0.all2$outcome = Q0
+  obsdata0.all2$Y = Q0
   obsdata0.all2$atraj = apply(obsdata0.all2[, treatment], 1, paste0, collapse = "")
-  obsdataT2 = aggregate(outcome ~ atraj, data = obsdata0.all2, FUN = mean)
-  obsdataT2$outcome = as.numeric(as.character(obsdataT2$outcome))
+  obsdataT2 = aggregate(Y ~ atraj, data = obsdata0.all2, FUN = mean)
+  obsdataT2$Y = as.numeric(as.character(obsdataT2$Y))
 
   #Influence curves
   D = lapply(1:nregimes,function(x) as.matrix(Reduce('+',lapply(1:number_traj, function(y){
     comps = as.matrix(D_list[[y]][[x]] +  as.numeric(Qstar[[x]] - mean(Qstar[[x]])))
     return(comps)
   }))))
-  list_pltmle_countermeans = list(counter_means = obsdataT2$outcome, D = D)
+  list_pltmle_countermeans = list(counter_means = obsdataT2$Y, D = D)
   return(list_pltmle_countermeans)
 }
 

diff --git a/R/predict_traj.R b/R/predict_traj.R
@@ -12,15 +12,15 @@
 #' @export predict_traj
 #' @author Awa Diop, Denis Talbot
 
-predict_traj <- function(identifier, total_followup, treatment, time, time_values, trajmodel,obsdata) {
+predict_traj <- function(identifier, total_followup, treatment, time, trajmodel) {
   data_combn <- bincombinations(total_followup)
 
   # Treatment regime in a long format
   rdata_counter <- reshape(data.frame(data_combn), direction = "long", varying = 1:total_followup, sep = "")
   colnames(rdata_counter) <- c(time, treatment, identifier)
-  rdata_counter[,time] <- time_values[rdata_counter[,time]]
 
-  post_probs <- posterior(trajmodel, newdata = data.frame(rdata_counter))
+  post_probs <- posterior(trajmodel, newdata = rdata_counter)
+
   rdata_counter$post_class <- apply(post_probs, 1, which.max)
 
   data_class <- aggregate(as.formula(paste0("post_class ~ ", identifier)), FUN = unique, data = rdata_counter)