more work on the documentation

StatisticsHealthEconomics · Dec 14, 2023 · ddc5e4c · ddc5e4c
1 parent 985922c
commit ddc5e4c
Show file tree

Hide file tree

Showing 22 changed files with 266 additions and 41 deletions.
diff --git a/Basic_example.R b/Basic_example.R
@@ -0,0 +1,50 @@
+## ----include = FALSE------------------------------------------------------
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+
+
+## ----setup, warning=FALSE, message=FALSE----------------------------------
+library(boot)      # non-parametric bootstrap in MAIC and ML G-computation
+library(copula)    # simulating BC covariates from Gaussian copula
+library(rstanarm)  # fit outcome regression, draw outcomes in Bayesian G-computation
+library(mimR)
+
+
+## ----load-data------------------------------------------------------------
+set.seed(555)
+
+AC.IPD <- read.csv(here::here("data", "AC_IPD.csv"))  # AC patient-level data
+BC.ALD <- read.csv(here::here("data", "BC_ALD.csv"))  # BC aggregate-level data
+
+
+## -------------------------------------------------------------------------
+head(AC.IPD)
+
+
+## -------------------------------------------------------------------------
+BC.ALD
+
+
+## ----hat_Delta_stats_maic-------------------------------------------------
+hat_Delta_stats_maic <- hat_Delta_stats(AC.IPD, BC.ALD, strategy = strategy_maic())
+
+
+## ----hat_Delta_stats_maic-print-------------------------------------------
+hat_Delta_stats_maic
+
+
+## ----hat_Delta_stats_stc--------------------------------------------------
+hat_Delta_stats_stc <- hat_Delta_stats(AC.IPD, BC.ALD, strategy = strategy_stc())
+hat_Delta_stats_stc
+
+
+## ----hat_Delta_stats_gcomp_ml---------------------------------------------
+# hat_Delta_stats_gcomp_ml <- hat_Delta_stats(AC.IPD, BC.ALD, strategy = strategy_gcomp_ml())
+
+
+## ----hat_Delta_stats_gcomp_stan-------------------------------------------
+hat_Delta_stats_gcomp_stan <- hat_Delta_stats(AC.IPD, BC.ALD, strategy = strategy_gcomp_stan())
+hat_Delta_stats_gcomp_stan
+
diff --git a/R/IPD_stats.R b/R/IPD_stats.R
@@ -1,5 +1,5 @@
 
-# create class for each approach
+# create S3 class for each approach
 
 #' @rdname strategy
 #' 
@@ -12,9 +12,8 @@
 #' target population are estimated by taking a weighted average of the
 #' outcomes \eqn{Y} of the \eqn{N} individuals in arm \eqn{t} of the _AB_ population
 #' 
-#' Used to compare marginal
-#' treatment effects where there are cross-trial differences in effect modifiers
-#' and limited patient-level data.
+#' Used to compare marginal treatment effects where there are cross-trial
+#' differences in effect modifiers and limited patient-level data.
 #' 
 #' \deqn{
 #' \hat{Y}_{} = \frac{\sum_{i=1}^{N} Y_{it(AB)} w_{it}}{\sum _{i=1}^{N} w_{it}}
@@ -81,6 +80,33 @@ strategy_stc <- function(formula =
 #' 
 #' @section G-computation maximum likelihood:
 #'
+#' G-computation marginalizes the conditional estimates by separating the regression modelling
+#' from the estimation of the marginal treatment effect for _A_ versus _C_.
+#' First, a regression model of the observed outcome \eqn{y} on the covariates \eqn{x} and treatment \eqn{z} is fitted to the _AC_ IPD:
+#' 
+#' \deqn{
+#' g(\mu_n) = \beta_0 + \boldsymbol{x}_n \boldsymbol{\beta_1} + (\beta_z + \boldsymbol{x_n^{EM}} \boldsymbol{\beta_2}) \mbox{I}(z_n=1)
+#' }
+#' In the context of G-computation, this regression model is often called the “Q-model.”
+#' Having fitted the Q-model, the regression coefficients are treated as nuisance parameters.
+#' The parameters are applied to the simulated covariates \eqn{x*} to predict hypothetical outcomes
+#' for each subject under both possible treatments. Namely, a pair of predicted outcomes,
+#' also called potential outcomes, under _A_ and under _C_, is generated for each subject.
+#'  
+#' By plugging treatment _C_ into the regression fit for every simulated observation,
+#' we predict the marginal outcome mean in the hypothetical scenario in which all units are under treatment _C_:
+#'
+#' \deqn{
+#' \hat{\mu}_0 = \int_{x^*} g^{-1} (\hat{\beta}_0 + x^* \hat{\beta}_1 ) p(x^*) dx^*
+#' }
+#' To estimate the marginal or population-average treatment effect for A versus C in the linear predictor scale,
+#' one back-transforms to this scale the average predictions, taken over all subjects on the natural outcome scale,
+#' and calculates the difference between the average linear predictions:
+#'  
+#' \deqn{
+#' \hat{\Delta}^{(2)}_{10} = g(\hat{\mu}_1) - g(\hat{\mu}_0)
+#' }
+#' 
 #' The default formula is
 #' \deqn{
 #'  y = X_3 + X_4 + \beta_{t}X_1 + \beta_{t}X_2
@@ -103,7 +129,32 @@ strategy_gcomp_ml <- function(formula =
 #' @rdname strategy
 #' 
 #' @section G-computation Bayesian:
+#' The difference between Bayesian G-computation and its maximum-likelihood
+#' counterpart is in the estimated distribution of the predicted outcomes. The
+#' Bayesian approach also marginalizes, integrates or standardizes over the
+#' joint posterior distribution of the conditional nuisance parameters of the
+#' outcome regression, as well as the joint covariate distribution.
 #'
+#' Draw a vector of size \eqn{N*} of predicted outcomes \eqn{y*z} under each set
+#' intervention \eqn{z* \in \{0, 1\}} from its posterior predictive distribution
+#' under the specific treatment. This is defined as \eqn{p(y*_{z*} |
+#' \mathcal{D}_{AC}) = \int_{\beta} p(y*_{z*} | \beta) p(\beta | \mathcal{D}_{AC}) d\beta}
+#' where \eqn{p(\beta | \mathcal{D}_{AC})} is the
+#' posterior distribution of the outcome regression coefficients \eqn{\beta},
+#' which encode the predictor-outcome relationships observed in the _AC_ trial IPD.
+#' 
+#' This is given by:
+#'
+#' \deqn{
+#' p(y*_{z*} | \mathcal{D}_{AC}) = \int_{x*} p(y* | z*, x*, \mathcal{D}_{AC}) p(x* | \mathcal{D}_{AC}) dx*
+#' }
+#' 
+#' \deqn{
+#' = \int_{x*} \int_{\beta} p(y* | z*, x*, \beta) p(x* | \beta) p(\beta | \mathcal{D}_{AC}) d\beta dx*
+#' }
+#' In practice, the integrals above can be approximated numerically, using full Bayesian
+#' estimation via Markov chain Monte Carlo (MCMC) sampling.
+#' 
 #' The default formula is
 #' \deqn{
 #'  y = X_3 + X_4 + \beta_{t}X_1 + \beta_{t}X_2
@@ -141,11 +192,11 @@ new_strategy <- function(strategy, ...) {
 #' 
 #' @description
 #' This is the main, top-level wrapper for `hat_Delta_stats()`.
+#' Methods taken from
+#' \insertCite{RemiroAzocar2022}{mimR}.
 #' 
-#' \insertCite{RemiroAzocar2022}{mimR}
-#' 
-#' @param AC.IPD Individual-level patient data. Suppose between studies A and C.
-#' @param BC.ALD Aggregate-level data. Suppose between studies B and C. 
+#' @param AC.IPD Individual-level patient data. Suppose between studies _A_ and _C_.
+#' @param BC.ALD Aggregate-level data. Suppose between studies _B_ and _C_. 
 #' @param strategy Computation strategy function. These can be
 #'    `strategy_maic()`, `strategy_stc()`, `strategy_gcomp_ml()` and  `strategy_gcomp_stan()`
 #' @param CI Confidence interval; between 0,1
@@ -160,6 +211,22 @@ new_strategy <- function(strategy, ...) {
 #' \insertRef{RemiroAzocar2022}{mimR}
 #' 
 #' @export
+#' @examples
+#' 
+#' data(AC_IPD)  # AC patient-level data
+#' data(BC_ALD)  # BC aggregate-level data
+#' 
+#' # matching-adjusted indirect comparison
+#' hat_Delta_stats_maic <- hat_Delta_stats(AC_IPD, BC_ALD, strategy = strategy_maic())
+#' 
+#' # simulated treatment comparison
+#' hat_Delta_stats_stc <- hat_Delta_stats(AC_IPD, BC_ALD, strategy = strategy_stc())
+#' 
+#' # G-computation with maximum likelihood
+#' # hat_Delta_stats_gcomp_ml <- hat_Delta_stats(AC_IPD, BC_ALD, strategy = strategy_gcomp_ml())
+#' 
+#' # G-computation with Bayesian inference
+#' hat_Delta_stats_gcomp_stan <- hat_Delta_stats(AC_IPD, BC_ALD, strategy = strategy_gcomp_stan())
 #' 
 hat_Delta_stats <- function(AC.IPD, BC.ALD, strategy, CI = 0.95, ...) {
 

diff --git a/R/gcomp_ml.R b/R/gcomp_ml.R
@@ -2,12 +2,13 @@
 #' G-computation maximum likelihood bootstrap
 #' 
 #' Using bootstrap resampling, calculates the log odds ratio.
-#' 
+#'     
 #' @param data Trial data 
 #' @param indices Indices sampled from rows of `data`
 #' @param formula Linear regression formula; default \eqn{y = X_3 + X_4 + \beta_t X_1 + \beta_t X_2} 
 #'
 #' @return Mean difference in expected log-odds
+#' @seealso [strategy_gcomp_ml()], [gcomp_ml_log_odds_ratio()]
 #' @export
 #' 
 gcomp_ml.boot <- function(data, indices,
@@ -24,13 +25,15 @@ gcomp_ml.boot <- function(data, indices,
 #'
 #' \eqn{\log(\hat{\mu}_A/(1 - \hat{\mu}_A)) - \log(\hat{\mu}_C/(1 - \hat{\mu}_C))}
 #'
-#' @param formula Linear regression formula
+#' @param formula Linear regression `formula` object
 #' @param dat Trial data
 #'
 #' @return Difference of log-odds 
+#' @seealso [strategy_gcomp_ml()], [gcomp_ml.boot()]
 #' @export
 #'
 gcomp_ml_log_odds_ratio <- function(formula, dat) {
+  browser()  ## what is type of data?...
 
   treat_name <- get_treatment_name(formula)
   covariate_names <- get_covariate_names(formula)
@@ -62,7 +65,6 @@ gcomp_ml_log_odds_ratio <- function(formula, dat) {
                               sd = dat[[sd_names[i]]]))
   }
 
-  browser()
   # sample covariates from approximate joint distribution using copula
   mvd <- mvdc(copula = cop,
               margins = rep("norm", n_covariates),  # Gaussian marginals

diff --git a/R/maic.R b/R/maic.R
@@ -1,6 +1,7 @@
 
 #' Estimate MAIC weights 
 #' 
+#' Matching-adjusted indirect comparison weights.
 #' Method is taken from
 #' \insertCite{Signorovitch2010}{mimR}.
 #' 
@@ -31,14 +32,17 @@ maic_weights <- function(X_EM) {
 }
 
 
-#' MAIC bootstrap
+#' MAIC bootstrap sample
+#' 
+#' Matching-adjusted indirect comparison bootstrap sampling.
 #' 
 #' @template args-ipd
 #' @param indices Vector of indices, same length as original,
 #'   which define the bootstrap sample
 #' @param formula Linear regression formula
 #' @template args-ald
 #' @return Fitted treatment coefficient is marginal effect for _A_ vs _C_
+#' @seealso [IPD_stats.maic()]
 #' 
 maic.boot <- function(ipd, indices, formula, ald) {
 

diff --git a/R/marginal_stats.R b/R/marginal_stats.R
@@ -18,7 +18,11 @@ marginal_variance <- function(ald, treatments = list("B", "C")) {
 #' Marginal treatment effect from reported event counts
 #' 
 #' Calculate
-#' \deqn{\log(n_B n_{\bar{C}}) - log(n_C n_{\bar{B}})}.
+#' \deqn{
+#' \log\left( \frac{n_B/(N_B-n_B)}{n_C/(N_B-n_{B})} \right) = \log(n_B n_{\bar{C}}) - log(n_C n_{\bar{B}})
+#' }
+#' where \eqn{\bar{C}} is the compliment of \eqn{C}
+#' so e.g. \eqn{n_{\bar{C}} = N_C - n_c}.
 #'
 #' @param ald Aggregate-level data
 #' @param treatments Treatment labels; default _B_ vs _C_

diff --git a/man/gcomp_ml.boot.Rd b/man/gcomp_ml.boot.Rd
diff --git a/man/gcomp_ml_log_odds_ratio.Rd b/man/gcomp_ml_log_odds_ratio.Rd
diff --git a/man/gen_data.Rd b/man/gen_data.Rd
diff --git a/man/get_covariate_names.Rd b/man/get_covariate_names.Rd
diff --git a/man/get_mean_names.Rd b/man/get_mean_names.Rd
diff --git a/man/get_sd_names.Rd b/man/get_sd_names.Rd
diff --git a/man/hat_Delta_stats.Rd b/man/hat_Delta_stats.Rd