hta-pharma · MikeJSeo · Sep 1, 2025 · Aug 29, 2025 · Aug 29, 2025 · Sep 1, 2025
diff --git a/design/explore_normalization/binary.R b/design/explore_normalization/binary.R
@@ -0,0 +1,97 @@
+# setwd("~/GitHub/maicplus")
+source("design/explore_normalization/normalize_weights.R")
+
+library(maicplus)
+library(dplyr)
+
+data(centered_ipd_sat)
+data(adrs_sat)
+
+centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
+centered_colnames <- paste0(centered_colnames, "_CENTERED")
+
+# get dummy binary pseudo IPD
+pseudo_adrs <- get_pseudo_ipd_binary(
+  binary_agd = data.frame(
+    ARM = "B",
+    RESPONSE = c("YES", "NO"),
+    COUNT = c(280, 120)
+  ),
+  format = "stacked"
+)
+
+weighted_data <- estimate_weights(
+  data = centered_ipd_sat,
+  centered_colnames = centered_colnames
+)
+weighted_data_SW1 <- weighted_data_SWN <- weighted_data_SWESS <- weighted_data
+weighted_data_SW1$data$weights <- normalize_weights(weighted_data$data$weights, method = "SW1")
+weighted_data_SWN$data$weights <- normalize_weights(weighted_data$data$weights, method = "SWN")
+weighted_data_SWESS$data$weights <- normalize_weights(weighted_data$data$weights, method = "SWESS")
+
+
+### OW
+result_OW <- maic_unanchored(
+  weights_object = weighted_data,
+  ipd = adrs_sat,
+  pseudo_ipd = pseudo_adrs,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_type = "binary",
+  endpoint_name = "Binary Endpoint",
+  eff_measure = "OR",
+  # binary specific args
+  binary_robust_cov_type = "HC3"
+)
+
+### SW1
+result_SW1 <- maic_unanchored(
+  weights_object = weighted_data_SW1,
+  ipd = adrs_sat,
+  pseudo_ipd = pseudo_adrs,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_type = "binary",
+  endpoint_name = "Binary Endpoint",
+  eff_measure = "OR",
+  # binary specific args
+  binary_robust_cov_type = "HC3"
+)
+
+### SWN
+result_SWN <- maic_unanchored(
+  weights_object = weighted_data_SWN,
+  ipd = adrs_sat,
+  pseudo_ipd = pseudo_adrs,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_type = "binary",
+  endpoint_name = "Binary Endpoint",
+  eff_measure = "OR",
+  # binary specific args
+  binary_robust_cov_type = "HC3"
+)
+
+### SWESS
+result_SWESS <- maic_unanchored(
+  weights_object = weighted_data_SWESS,
+  ipd = adrs_sat,
+  pseudo_ipd = pseudo_adrs,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_type = "binary",
+  endpoint_name = "Binary Endpoint",
+  eff_measure = "OR",
+  # binary specific args
+  binary_robust_cov_type = "HC3"
+)
+
+# results are exactly the same
+result_OW$inferential$summary
+result_SW1$inferential$summary
+result_SWN$inferential$summary
+result_SWESS$inferential$summary
diff --git a/design/explore_normalization/flexsurv_trial_with_weights.R b/design/explore_normalization/flexsurv_trial_with_weights.R
@@ -0,0 +1,56 @@
+# R Script to fit a weighted parametric survival model using the flexsurvreg function
+# directly, without a separate function definition.
+#
+# This script performs the following steps:
+# 1. Loads the required 'flexsurv' package.
+# 2. Generates sample survival data.
+# 3. Creates a numeric vector of case weights.
+# 4. Fits a weighted Weibull model using flexsurvreg.
+# 5. Prints a summary of the fitted weighted model.
+# 6. For comparison, fits and summarizes an unweighted model.
+#
+# To run this script, you must have the 'flexsurv' package installed.
+# You can install it with: install.packages("flexsurv")
+
+# Load the flexsurv package.
+library(flexsurv)
+
+# --- Step 1: Generate some sample survival data ---
+set.seed(42)
+n <- 200
+sample_data <- data.frame(
+  time = rexp(n, rate = 0.1),
+  status = rbinom(n, 1, 0.8),
+  group = factor(rep(c("A", "B"), each = n / 2)),
+  age = rnorm(n, 50, 10)
+)
+
+# --- Step 2: Create a weighting variable ---
+# Let's say observations from group 'B' are twice as important.
+weights_vector <- ifelse(sample_data$group == "A", 1, 2)
+weights_vector_SW1 <- weights_vector / sum(weights_vector)
+
+# --- Step 3: Fit the weighted Weibull model directly using flexsurvreg ---
+weighted_weibull_model <- flexsurvreg(
+  formula = Surv(time, status) ~ group + age,
+  data = sample_data,
+  weights = weights_vector,
+  dist = "weibull"
+)
+
+weighted_weibull_model_SW1 <- flexsurvreg(
+  formula = Surv(time, status) ~ group + age,
+  data = sample_data,
+  weights = weights_vector_SW1,
+  dist = "weibull"
+)
+
+# --- Step 4: Print a summary of the fitted weighted model ---
+print("Summary of the weighted Weibull model:")
+weighted_weibull_model
+weighted_weibull_model_SW1
+
+print(summary(weighted_weibull_model)[[1]][1:10, ])
+print(summary(weighted_weibull_model_SW1)[[1]][1:10, ])
+
+## Conclusion: estimates are the same with different normalization
diff --git a/design/explore_normalization/normalize_weights.R b/design/explore_normalization/normalize_weights.R
@@ -0,0 +1,29 @@
+normalize_weights <- function(weights, method) {
+  n <- length(weights)
+
+  # Handle edge case of zero weights
+  if (sum(weights) == 0) {
+    return(rep(1, n))
+  }
+
+  switch(method,
+    # Original weights (no normalization)
+    "OW" = weights,
+
+    # Sum to one (standard normalization)
+    # Manuscript: w̃_i = w_i / Σw_j
+    "SW1" = weights / sum(weights),
+
+    # Sum to N (preserve sample size interpretation)
+    # Manuscript: w̃_i = n × w_i / Σw_j
+    "SWN" = weights * n / sum(weights),
+
+    # Sum to ESS (balance between SW1 and SWN)
+    # Manuscript: w̃_i = ESS(w) × w_i / Σw_j
+    "SWESS" = {
+      ess <- sum(weights)^2 / sum(weights^2)
+      weights * ess / sum(weights)
+    },
+    stop("Unknown normalization method")
+  )
+}
diff --git a/design/explore_normalization/survival.R b/design/explore_normalization/survival.R
@@ -0,0 +1,96 @@
+# setwd("~/GitHub/maicplus")
+source("design/explore_normalization/normalize_weights.R")
+
+library(maicplus)
+library(dplyr)
+
+data(centered_ipd_sat)
+data(adtte_sat)
+data(pseudo_ipd_sat)
+
+centered_colnames <- c("AGE", "AGE_SQUARED", "SEX_MALE", "ECOG0", "SMOKE", "N_PR_THER_MEDIAN")
+centered_colnames <- paste0(centered_colnames, "_CENTERED")
+
+weighted_data <- estimate_weights(
+  data = centered_ipd_sat,
+  centered_colnames = centered_colnames
+)
+weighted_data_SW1 <- weighted_data_SWN <- weighted_data_SWESS <- weighted_data
+weighted_data_SW1$data$weights <- normalize_weights(weighted_data$data$weights, method = "SW1")
+weighted_data_SWN$data$weights <- normalize_weights(weighted_data$data$weights, method = "SWN")
+weighted_data_SWESS$data$weights <- normalize_weights(weighted_data$data$weights, method = "SWESS")
+
+
+### OW
+result_OW <- maic_unanchored(
+  weights_object = weighted_data,
+  ipd = adtte_sat,
+  pseudo_ipd = pseudo_ipd_sat,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_name = "Overall Survival",
+  endpoint_type = "tte",
+  eff_measure = "HR",
+  time_scale = "month",
+  km_conf_type = "log-log"
+)
+
+### SW1
+result_SW1 <- maic_unanchored(
+  weights_object = weighted_data_SW1,
+  ipd = adtte_sat,
+  pseudo_ipd = pseudo_ipd_sat,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_name = "Overall Survival",
+  endpoint_type = "tte",
+  eff_measure = "HR",
+  time_scale = "month",
+  km_conf_type = "log-log"
+)
+
+### SWN
+result_SWN <- maic_unanchored(
+  weights_object = weighted_data_SWN,
+  ipd = adtte_sat,
+  pseudo_ipd = pseudo_ipd_sat,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_name = "Overall Survival",
+  endpoint_type = "tte",
+  eff_measure = "HR",
+  time_scale = "month",
+  km_conf_type = "log-log"
+)
+
+### SWESS
+result_SWESS <- maic_unanchored(
+  weights_object = weighted_data_SWESS,
+  ipd = adtte_sat,
+  pseudo_ipd = pseudo_ipd_sat,
+  trt_ipd = "A",
+  trt_agd = "B",
+  normalize_weight = FALSE,
+  endpoint_name = "Overall Survival",
+  endpoint_type = "tte",
+  eff_measure = "HR",
+  time_scale = "month",
+  km_conf_type = "log-log"
+)
+
+# results are not the same
+result_OW$inferential$summary
+result_SW1$inferential$summary
+result_SWN$inferential$summary
+result_SWESS$inferential$summary
+
+# baseline hazards are different
+library(survival)
+basehaz(result_OW$inferential$fit$model_after)[1:10, ]
+basehaz(result_SW1$inferential$fit$model_after)[1:10, ]
+
+basehaz(result_OW$inferential$fit$model_before)[1:10, ]
+basehaz(result_SW1$inferential$fit$model_before)[1:10, ]