diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
new file mode 100644
index 0000000..12b061b
--- /dev/null
+++ b/.github/workflows/check.yml
@@ -0,0 +1,49 @@
+---
+name: Check
+
+on:
+  pull_request:
+    types:
+      - opened
+      - synchronize
+      - reopened
+      - ready_for_review
+    branches:
+      - master
+      - development
+  push:
+    branches:
+      - master
+      - development
+
+  workflow_dispatch:
+
+jobs:
+  style:
+    if: github.event_name == 'pull_request'
+    name: Style Check
+    uses: insightsengineering/r.pkg.template/.github/workflows/style.yaml@main
+    secrets:
+      REPO_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+    with:
+      auto-update: false
+
+  unit-test:
+    runs-on: ubuntu-latest
+    container:
+      image: ghcr.io/insightsengineering/rstudio
+    steps:
+        -
+          name: Checkout repo
+          uses: actions/checkout@v4.1.1
+        -
+          name: Build
+          run: |
+            Rscript -e "devtools::install()"
+        -
+          name: Test
+          run: |
+            Rscript -e "testthat::test_local()"
+
+
+
diff --git a/.gitignore b/.gitignore
index 2263322..3234e42 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,5 @@ docker
 rstudio
 *.o
 *.so
+RcppExports.cpp
+.vscode
diff --git a/NAMESPACE b/NAMESPACE
index 9d02f22..29a1604 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,12 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(predict,brm)
-S3method(print,brm)
 export(brm)
-export(getProbRD)
-export(getProbRR)
-export(getProbScalarRD)
-export(getProbScalarRR)
+export(get_prob_rd)
+export(get_prob_rr)
+export(predict_brm)
+export(print_brm)
 exportPattern("ˆ[[:alpha:]]+")
 importFrom(Rcpp,evalCpp)
 useDynLib(brm)
diff --git a/R/1.1_MLE_Point.R b/R/1.1_MLE_Point.R
deleted file mode 100644
index 354b75f..0000000
--- a/R/1.1_MLE_Point.R
+++ /dev/null
@@ -1,63 +0,0 @@
-
-max.likelihood = function(param, y, x, va, vb, alpha.start, beta.start, weights,
-                          max.step, thres, pa, pb) {
-
-    startpars = c(alpha.start, beta.start)
-
-    getProb = if (param == "RR") getProbRR else getProbRD
-
-    ## negative log likelihood function
-    neg.log.likelihood = function(pars) {
-        alpha = pars[1:pa]
-        beta = pars[(pa + 1):(pa + pb)]
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0 = p0p1[, 1];   p1 = p0p1[, 2]
-
-        return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
-                        (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x ==
-                                                                                          1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                                 1]) * weights[x == 1]))
-    }
-
-    neg.log.likelihood.alpha = function(alpha){
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] +
-                        (y[x==0])*log(p0[x==0])*weights[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] +
-                           (y[x==1])*log(p1[x==1])*weights[x==1]))
-    }
-
-    neg.log.likelihood.beta = function(beta){
-        p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weights[x==0] +
-                        (y[x==0])*log(p0[x==0])*weights[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weights[x==1] +
-                           (y[x==1])*log(p1[x==1])*weights[x==1]))
-    }
-
-
-    ## Optimization
-
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha = alpha.start; beta = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-        step = step + 1
-        opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-        diff1 = Diff(opt1$par,alpha)
-        alpha = opt1$par
-        opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-        diff  = max(diff1,Diff(opt2$par,beta))
-        beta = opt2$par
-    }
-
-    opt = list(par = c(alpha,beta), convergence = (step < max.step),
-               value = neg.log.likelihood(c(alpha,beta)), step = step)
-
-    return(opt)
-}
-
diff --git a/R/1.2_MLE_Var.R b/R/1.2_MLE_Var.R
deleted file mode 100644
index d45f7ad..0000000
--- a/R/1.2_MLE_Var.R
+++ /dev/null
@@ -1,52 +0,0 @@
-
-
-### variance calculation
-
-var.mle.rr = function(x, alpha.ml, beta.ml, va, vb, weights) {
-    
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    n = dim(va)[1]
-    pA = rep(NA, n)     # P(Y=1|A,V); here A = X
-    pA[x == 0] = p0p1[x == 0, 1]
-    pA[x == 1] = p0p1[x == 1, 2]
-    
-    expect.dl.by.dpsi0.squared = (pA)/(1 - pA)
-    dpsi0.by.dphi = (1 - p0p1[, 1]) * (1 - p0p1[, 2])/((1 - p0p1[, 1]) + (1 - 
-        p0p1[, 2]))
-    dpsi0.by.dtheta = -(1 - p0p1[, 1])/((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
-    tmp = cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
-    ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb
-    fisher.info = (t(expect.dl.by.dpsi0.squared * weights * tmp) %*% tmp)
-    return(solve(fisher.info))
-}
-
-
-
-
-### variance calculation
-
-var.mle.rd = function(x, alpha.ml, beta.ml, va, vb, weights) {
-    
-    p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0             # P(Y=1|A,V); here A = X
-    pA[x == 1] = p1[x == 1]
-    s0 = p0 * (1 - p0)
-    s1 = p1 * (1 - p1)
-    sA = pA * (1 - pA)
-    
-    rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences
-    
-    expect.dl.by.dpA.squared = 1/sA
-    dp0.by.dphi = s0 * s1/(s0 + s1)
-    dp0.by.drho = -s0/(s0 + s1)
-    drho.by.dalpha = (1 - rho^2) * va
-    dphi.by.dbeta = vb
-    
-    tmp = cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
-    fisher.info = (t(expect.dl.by.dpA.squared * weights * tmp) %*% tmp)
-    return(solve(fisher.info))
-} 
diff --git a/R/1_CallMLE.R b/R/1_CallMLE.R
deleted file mode 100644
index aa82343..0000000
--- a/R/1_CallMLE.R
+++ /dev/null
@@ -1,47 +0,0 @@
-
-MLEst = function(param, y, x, va, vb, weights, max.step, thres, alpha.start, 
-    beta.start, pa, pb) {
-    
-    ## starting values for parameter optimization
-    if (is.null(alpha.start)) 
-        alpha.start = rep(0, pa)
-    if (is.null(beta.start)) 
-        beta.start = rep(0, pb)
-    
-    if (param == "OR") {
-        fit = stats::glm(y ~ vb - 1 + x * va - va - x, family = "binomial",
-                         weights = weights, start = c(beta.start, alpha.start))
-        
-        point.temp = summary(fit)$coefficients[, 1]
-        index = c((pb + 1):(pa + pb), 1:pb)
-        point.est = point.temp[index]
-        
-        cov = stats::vcov(fit)[index, index]
-        
-        converged = fit$converged
-        
-    } else {
-        
-        ### point estimate
-        mle = max.likelihood(param, y, x, va, vb, alpha.start, beta.start, 
-            weights, max.step, thres, pa, pb)
-        point.est = mle$par
-        converged = mle$convergence
-        # print(point.est)
-        alpha.ml = point.est[1:pa]
-        beta.ml = point.est[(pa + 1):(pa + pb)]
-        
-        ### Computing Fisher Information:
-        if (param == "RR") 
-            cov = var.mle.rr(x, alpha.ml, beta.ml, va, vb, weights)
-        if (param == "RD") 
-            cov = var.mle.rd(x, alpha.ml, beta.ml, va, vb, weights)
-        sd.est = sqrt(diag(cov))
-        
-    }
-    
-    name = paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
-    sol = WrapResults(point.est, cov, param, name, va, vb, converged)
-    return(sol)
-    
-} 
diff --git a/R/2.1_DR_Point.R b/R/2.1_DR_Point.R
deleted file mode 100644
index bee2cce..0000000
--- a/R/2.1_DR_Point.R
+++ /dev/null
@@ -1,65 +0,0 @@
-
-
-dr.estimate.onestep = function(param, y, x, va, vb, alpha.start, beta, pscore,
-    wt, weights, max.step, thres, message) {
-
-    startpars = c(alpha.start)  # pars only contain alpha
-    ## DR estimation equation^2
-    if (param == "RR") {
-        dr.objective = function(pars) {
-            p0 = getProbRR(mat_vec_mul(va,startpars), mat_vec_mul(vb, beta))[, 1]
-            H.alpha = as.vector(y * exp(-x * (va %*% pars)))
-            tmp = mat_vec_mul(t(va), (wt * (x - pscore) * (H.alpha - p0) * weights))
-            return(sum(tmp^2))
-        }
-    }
-    if (param == "RD") {
-        dr.objective = function(pars) {
-             p0 = getProbRD(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
-            H.alpha = y - x * tanh(mat_vec_mul(va, pars))
-            tmp = mat_vec_mul(t((H.alpha - p0) * (x - pscore)) , (va * wt * weights))
-            return(sum(tmp^2))
-        }
-    }
-
-        opt <- stats::optim(startpars, dr.objective, control=list(reltol=thres))
-        opt$convergence = (opt$convergence == 0) # change cf. optim()
-
-    return(opt)
-}
-
-
-
-dr.estimate.noiterate = function(param, y, x, va, vb, vc, alpha.ml, beta.ml,
-    gamma, optimal, weights, max.step, thres, alpha.start, message) {
-
-    pscore = as.vector(expit(mat_vec_mul(vc, gamma)))
-
-    if (optimal == TRUE) {
-        if (param == "RR") {
-            p0 = getProbRR(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))[, 1]
-            wt = as.vector(1/(1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha.ml)) -
-                1)))
-        }
-        if (param == "RD") {
-            p0 = getProbRD(mat_vec_mul(va,alpha.ml), mat_vec_mul(vb, beta.ml))[, 1]
-            rho = as.vector(tanh(mat_vec_mul(va, alpha.ml)))
-            wt = (1 - rho) * (1 + rho)/(p0 * (1 - p0) + rho * (1 - pscore) *
-                (1 - 2 * p0 - rho))
-        }
-    } else {
-        wt = rep(1, length(pscore))
-    }
-
-    if (is.null(alpha.start))
-        alpha.start = alpha.ml
-
-    alpha.dr.opt = dr.estimate.onestep(param, y, x, va, vb, alpha.start, beta.ml,
-        pscore, wt, weights, max.step, thres, message)
-
-    # if(MESSAGE){ print(paste('DR One Step: ',' Alpha:
-    # ',paste(round(alpha.dr,5),collapse=', '),' Beta:
-    # ',paste(round(beta.ml,5),collapse=', '))) }
-
-    return(alpha.dr.opt)
-}
diff --git a/R/2.2.1_Hessian_RD.R b/R/2.2.1_Hessian_RD.R
deleted file mode 100644
index 63a3dfb..0000000
--- a/R/2.2.1_Hessian_RD.R
+++ /dev/null
@@ -1,66 +0,0 @@
-Hessian2RD = function(y, x, va, vb, alpha.ml, beta.ml, cnt) {
-    # calculating the Hessian using the second derivative have to do so
-    # because under mis-specification of models Hessian no longer equals the
-    # square of the first order derivatives
-    
-    p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0
-    pA[x == 1] = p1[x == 1]
-    s0 = p0 * (1 - p0)
-    s1 = p1 * (1 - p1)
-    sA = pA * (1 - pA)
-    
-    rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences
-    
-    ### First order derivatives ###
-    
-    dl.by.dpA = (y - pA)/sA
-    dp0.by.dphi = s0 * s1/(s0 + s1)
-    dp0.by.drho = -s0/(s0 + s1)
-    drho.by.dalpha = va * (1 - rho^2)
-    dphi.by.dbeta = vb
-    
-    dpA.by.drho = dp0.by.drho + x
-    dpA.by.dalpha = drho.by.dalpha * dpA.by.drho
-    dpA.by.dphi = dp0.by.dphi
-    dpA.by.dbeta = dphi.by.dbeta * dpA.by.dphi
-    
-    ### Second order derivatives ###
-    
-    d2l.by.dpA.2 = -(y - pA)^2/sA^2
-    d2pA.by.drho.2 = s0 * s1 * (2 - 2 * p0 - 2 * p1)/(s0 + s1)^3
-    d2pA.by.dphi.drho = (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1/(s0 + 
-        s1)^3
-    d2pA.by.dphi.2 = (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1/(s0 + 
-        s1)^3
-    
-    d2rho.by.dalpha.2 = -2 * t(va * rho) %*% drho.by.dalpha
-    
-    ### Compute elements of the Hessian matrix ###
-    
-    d2l.by.dalpha.2 = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dalpha + 
-        t(drho.by.dalpha * dl.by.dpA * d2pA.by.drho.2 * cnt) %*% drho.by.dalpha - 
-        2 * t(va * rho * dl.by.dpA * dpA.by.drho * cnt) %*% drho.by.dalpha
-    
-    d2l.by.dalpha.dbeta = t(dpA.by.dalpha * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + 
-        t(drho.by.dalpha * dl.by.dpA * d2pA.by.dphi.drho * cnt) %*% dphi.by.dbeta
-    d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta)
-    
-    d2l.by.dbeta.2 = t(dpA.by.dbeta * d2l.by.dpA.2 * cnt) %*% dpA.by.dbeta + 
-        t(dphi.by.dbeta * dl.by.dpA * d2pA.by.dphi.2 * cnt) %*% dphi.by.dbeta
-    
-    hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, 
-        d2l.by.dbeta.2))
-    ### NB Note the extra minus sign here
-    
-    return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1, 
-        sA = sA, rho = rho, dl.by.dpA = dl.by.dpA, dp0.by.dphi = dp0.by.dphi, 
-        dp0.by.drho = dp0.by.drho, drho.by.dalpha = drho.by.dalpha, dphi.by.dbeta = dphi.by.dbeta, 
-        dpA.by.drho = dpA.by.drho, dpA.by.dalpha = dpA.by.dalpha, dpA.by.dphi = dpA.by.dphi, 
-        dpA.by.dbeta = dpA.by.dbeta))
-    
-} 
diff --git a/R/2.2.1_Hessian_RR.R b/R/2.2.1_Hessian_RR.R
deleted file mode 100644
index 3d4dbdc..0000000
--- a/R/2.2.1_Hessian_RR.R
+++ /dev/null
@@ -1,73 +0,0 @@
-Hessian2RR = function(y, x, va, vb, alpha.ml, beta.ml, weights) {
-    # calculating the Hessian using the second derivative have to do so
-    # because under mis-specification of models Hessian no longer equals the
-    # square of the first order derivatives
-    
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(va)
-    pA = p0
-    pA[x == 1] = p1[x == 1]
-    
-    
-    ### Building blocks
-    
-    dpsi0.by.dtheta = -(1 - p0)/(1 - p0 + 1 - p1)
-    dpsi0.by.dphi = (1 - p0) * (1 - p1)/(1 - p0 + 1 - p1)
-    
-    dtheta.by.dalpha = va
-    dphi.by.dbeta = vb
-    
-    dl.by.dpsi0 = (y - pA)/(1 - pA)
-    d2l.by.dpsi0.2 = (y - 1) * pA/((1 - pA)^2)
-    
-    
-    
-    ###### d2l.by.dalpha.2
-    
-    d2psi0.by.dtheta.2 = ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1)/((1 - 
-        p0 + 1 - p1)^2)
-    
-    d2l.by.dtheta.2 = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + dl.by.dpsi0 * 
-        d2psi0.by.dtheta.2
-    
-    d2l.by.dalpha.2 = t(dtheta.by.dalpha * d2l.by.dtheta.2 * weights) %*% 
-        dtheta.by.dalpha
-    
-    
-    ###### d2l.by.dalpha.dbeta
-    
-    d2psi0.by.dtheta.dphi = (1 - p0) * (1 - p1) * (p0 - p1)/(1 - p0 + 1 - 
-        p1)^3
-    
-    d2l.by.dtheta.dphi = d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi + 
-        dl.by.dpsi0 * d2psi0.by.dtheta.dphi
-    
-    d2l.by.dalpha.dbeta = t(dtheta.by.dalpha * d2l.by.dtheta.dphi * weights) %*% 
-        dphi.by.dbeta
-    d2l.by.dbeta.dalpha = t(d2l.by.dalpha.dbeta)
-    # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb
-    
-    
-    #### d2l.by.dbeta2
-    
-    d2psi0.by.dphi.2 = (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2)/(1 - p0 + 1 - 
-        p1)^2) * dpsi0.by.dphi
-    
-    d2l.by.dphi.2 = d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + dl.by.dpsi0 * d2psi0.by.dphi.2
-    
-    d2l.by.dbeta.2 = t(dphi.by.dbeta * d2l.by.dphi.2 * weights) %*% dphi.by.dbeta
-    
-    
-    
-    hessian = -rbind(cbind(d2l.by.dalpha.2, d2l.by.dalpha.dbeta), cbind(d2l.by.dbeta.dalpha, 
-        d2l.by.dbeta.2))
-    ### NB Note the extra minus sign here
-    
-    return(list(hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0.by.dtheta = dpsi0.by.dtheta, 
-        dpsi0.by.dphi = dpsi0.by.dphi, dtheta.by.dalpha = dtheta.by.dalpha, 
-        dphi.by.dbeta = dphi.by.dbeta, dl.by.dpsi0 = dl.by.dpsi0))
-    
-} 
diff --git a/R/2.2_DR_Var_RD.R b/R/2.2_DR_Var_RD.R
deleted file mode 100644
index 97730f0..0000000
--- a/R/2.2_DR_Var_RD.R
+++ /dev/null
@@ -1,120 +0,0 @@
-
-## Sandwich estimator for variance of RD
-
-var.rd.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, 
-    optimal, weights) {
-    ######################################## 
-    
-    pscore = as.vector(expit(vc %*% gamma))
-    n = length(pscore)
-    
-    ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing
-    ### the Hessian:
-    
-    Hrd = Hessian2RD(y, x, va, vb, alpha.ml, beta.ml, weights)
-    hessian = Hrd$hessian
-    p0  = Hrd$p0; p1 = Hrd$p1; pA = Hrd$pA
-    s0  = Hrd$s0; s1 = Hrd$s1; sA = Hrd$sA
-    rho = Hrd$rho
-    dl.by.dpA      = Hrd$dl.by.dpA
-    dp0.by.dphi    = Hrd$dp0.by.dphi
-    dp0.by.drho    = Hrd$dp0.by.drho
-    drho.by.dalpha = Hrd$drho.by.dalpha
-    dphi.by.dbeta  = Hrd$dphi.by.dbeta
-    dpA.by.drho    = Hrd$dpA.by.drho
-    dpA.by.dalpha  = Hrd$dpA.by.dalpha
-    dpA.by.dphi    = Hrd$dpA.by.dphi
-    dpA.by.dbeta   = Hrd$dpA.by.dbeta
-    
-    
-    ############# extra building blocks ##########################
-    
-    H.alpha = y - x * as.vector(tanh(va %*% alpha.dr))
-    
-    ############# Calculation of optimal vector (used in several places below) ##
-    
-    if (optimal == TRUE) {
-        wt = (1 - rho^2)/(pscore * s0 + (1 - pscore) * s1)
-    } else {
-        wt = rep(1, n)
-    }
-    
-    
-    ### 2. -E[dU.by.dalphaml.betaml] ####################################
-    
-    dU.by.dp0 = -va * wt * (x - pscore)  # n by 2
-    dp0.by.dalpha.beta = cbind(drho.by.dalpha * dp0.by.drho, dphi.by.dbeta * 
-        dp0.by.dphi)  # n by 4
-    
-    dU.by.dwt = va * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        esA = pscore * s0 + (1 - pscore) * s1  # E[s_{1-A}]...
-        dwt.by.drho = (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 - 
-            2 * p1))/esA^2
-        dwt.by.dp0 = -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1)/esA^2
-        
-        dwt.by.dalpha = drho.by.dalpha * (dwt.by.drho + dwt.by.dp0 * dp0.by.drho)
-        dwt.by.dbeta = dphi.by.dbeta * dwt.by.dp0 * dp0.by.dphi
-        
-        dwt.by.dalpha.beta = cbind(dwt.by.dalpha, dwt.by.dbeta)  # n by 4    
-    } else {
-        dwt.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb))
-    }
-    
-    dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% (dp0.by.dalpha.beta) + 
-        t(dU.by.dwt * weights) %*% dwt.by.dalpha.beta
-    
-    
-    ### 3. tau = -E[dU/dalpha.dr] ######################################## (This
-    ### is the bread of the sandwich estimate)
-    
-    dU.by.dH = va * wt * (x - pscore)  # n by 2
-    rho.dr = as.vector(tanh(va %*% alpha.dr))
-    dH.by.dalpha.dr = -va * x * (1 - rho.dr^2)
-    
-    tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights)  # 2 by 2
-    
-    
-    ### 4. E[d(prop score score equation)/dgamma]
-    
-    dpscore.by.dgamma = vc * pscore * (1 - pscore)  # n by 2   
-    part4 = -t(vc * weights) %*% dpscore.by.dgamma  # 2 by 2
-    
-    
-    ### 5. E[dU/dgamma]
-    
-    dU.by.dpscore = -va * wt * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        dwt.by.dpscore = -(1 - rho^2) * (s0 - s1)/esA^2
-        dwt.by.dgamma = dpscore.by.dgamma * dwt.by.dpscore  # n by 2
-    } else {
-        dwt.by.dgamma = matrix(0, n, ncol(vc))
-    }
-    
-    dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwt * 
-        weights) %*% dwt.by.dgamma  # 2 by 2
-    
-    
-    
-    ############################################################################# Assembling semi-parametric variance matrix
-    
-    U = va * wt * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    S = cbind(dpA.by.dalpha, dpA.by.dbeta) * dl.by.dpA
-    
-    pscore.score = vc * (x - pscore)
-    
-    Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - 
-        t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score))  # n by 2
-    USigma = t(Utilde * weights) %*% Utilde/sum(weights)
-    
-    
-    
-    ################################### Asymptotic var matrix for alpha.dr
-    
-    alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights)
-    return(alpha.dr.variance)
-    
-} 
diff --git a/R/2.2_DR_Var_RR.R b/R/2.2_DR_Var_RR.R
deleted file mode 100644
index 66277f6..0000000
--- a/R/2.2_DR_Var_RR.R
+++ /dev/null
@@ -1,113 +0,0 @@
-
-## Sandwich estimator for variance of RR
-
-var.rr.dr = function(y, x, va, vb, vc, alpha.dr, alpha.ml, beta.ml, gamma, 
-    optimal, weights) {
-    ######################################## 
-    
-    pscore = as.vector(expit(vc %*% gamma))
-    n = length(pscore)
-    
-    ### 1. - E[dS/d(alpha.ml,beta.ml)] ############################## Computing
-    ### the Hessian:
-    
-    Hrr = Hessian2RR(y, x, va, vb, alpha.ml, beta.ml, weights)
-    hessian = Hrr$hessian
-    p0 = Hrr$p0; p1  = Hrr$p1; pA = Hrr$pA
-    dpsi0.by.dtheta  = Hrr$dpsi0.by.dtheta
-    dpsi0.by.dphi    = Hrr$dpsi0.by.dphi
-    dtheta.by.dalpha = Hrr$dtheta.by.dalpha 
-    dphi.by.dbeta    = Hrr$dphi.by.dbeta
-    dl.by.dpsi0      = Hrr$dl.by.dpsi0
-    
-    ############# extra building blocks ##########################
-    
-    H.alpha = y * exp(-x * (as.vector(va %*% alpha.dr)))
-    
-    ############# Calculation of optimal vector (used in several places below) ##
-    
-    if (optimal == TRUE) {
-        theta = as.vector(va %*% alpha.ml)  # avoid n by 1 matrix
-        dtheta.by.dalpha.beta = cbind(va, matrix(0, n, length(beta.ml)))
-        wt = 1/(1 - p0 + (1 - pscore) * (exp(-theta) - 1))
-    } else {
-        wt = rep(1, n)
-    }
-    
-    
-    ### 2. -E[dU.by.dalphaml.betaml] ####################################
-    
-    dU.by.dp0 = -va * wt * (x - pscore)  # n by 2
-    dp0.by.dpsi0 = p0
-    dpsi0.by.dalpha.beta = cbind(dpsi0.by.dtheta * dtheta.by.dalpha, dpsi0.by.dphi * 
-        dphi.by.dbeta)  # n by 4
-    # 4 = 2 (alpha) + 2 (beta)
-    dp0.by.dalpha.beta = dpsi0.by.dalpha.beta * dp0.by.dpsi0  # n by 4
-    
-    dU.by.dwt = va * (x - pscore) * (H.alpha - p0)  # n by 2
-    dwt.by.dwti = -wt^2  # n
-    # wti is short for wt_inv
-    dU.by.dwti = dU.by.dwt * dwt.by.dwti  # n by 2
-    if (optimal == TRUE) {
-        dwti.by.dalpha.beta = -dp0.by.dalpha.beta - (1 - pscore) * exp(-theta) * 
-            dtheta.by.dalpha.beta  # n by 4    
-    } else {
-        dwti.by.dalpha.beta = matrix(0, n, ncol(va) + ncol(vb))
-    }
-    
-    dU.by.dalpha.ml.beta.ml = t(dU.by.dp0 * weights) %*% dp0.by.dalpha.beta + 
-        t(dU.by.dwti * weights) %*% dwti.by.dalpha.beta
-    
-    
-    ### 3. tau = -E[dU/dalpha.dr] ######################################## (This
-    ### is the bread of the sandwich estimate)
-    
-    dU.by.dH = va * wt * (x - pscore)  # n by 2
-    dH.by.dalpha.dr = -va * x * H.alpha  # n by 2 
-    
-    tau = -t(dU.by.dH * weights) %*% dH.by.dalpha.dr/sum(weights)  # 2 by 2
-    
-    
-    ### 4. E[d(prop score score equation)/dgamma]
-    
-    dpscore.by.dgamma = vc * pscore * (1 - pscore)  # n by 2   
-    part4 = -t(vc * weights) %*% dpscore.by.dgamma  # 2 by 2
-    
-    
-    ### 5. E[dU/dgamma]
-    
-    dU.by.dpscore = -va * wt * (H.alpha - p0)  # n by 2
-    
-    if (optimal == TRUE) {
-        dwti.by.dpscore = 1 - exp(-theta)  # n
-        dwti.by.dgamma = dpscore.by.dgamma * dwti.by.dpscore  # n by 2
-    } else {
-        dwti.by.dgamma = matrix(0, n, ncol(vc))
-    }
-    
-    dU.by.dgamma = t(dU.by.dpscore * weights) %*% dpscore.by.dgamma + t(dU.by.dwti * 
-        weights) %*% dwti.by.dgamma  # 2 by 2
-    
-    
-    
-    ############################################################################# Assembling semi-parametric variance matrix
-    
-    U = va * wt * (x - pscore) * (H.alpha - p0)  # n by 2
-    
-    S = cbind(dl.by.dpsi0 * (dpsi0.by.dtheta + x) * dtheta.by.dalpha, dl.by.dpsi0 * 
-        dpsi0.by.dphi * dphi.by.dbeta)
-    pscore.score = vc * (x - pscore)
-    
-    Utilde = U - t(dU.by.dalpha.ml.beta.ml %*% (-solve(hessian)) %*% t(S)) - 
-        t(dU.by.dgamma %*% (solve(part4)) %*% t(pscore.score))  # n by 2
-    USigma = t(Utilde * weights) %*% Utilde/sum(weights)
-    
-    
-    
-    ################################### Asymptotic var matrix for alpha.dr
-    
-    alpha.dr.variance = solve(tau) %*% USigma %*% solve(tau)/sum(weights)
-    
-    return(alpha.dr.variance)
-    
-} 
diff --git a/R/2_CallDR.R b/R/2_CallDR.R
deleted file mode 100644
index 6ca8816..0000000
--- a/R/2_CallDR.R
+++ /dev/null
@@ -1,31 +0,0 @@
-
-DREst = function(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, 
-    weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message) {
-    
-    dr.est = dr.estimate.noiterate(param, y, x, va, vb, vc, alpha.ml, beta.ml, 
-        gamma, optimal, weights, max.step, thres, alpha.start, message)
-    point.est = dr.est$par
-    converged = dr.est$convergence
-    
-    if (param == "RR") 
-        alpha.cov = var.rr.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml, 
-                              gamma, optimal, weights)
-    if (param == "RD") 
-        alpha.cov = var.rd.dr(y, x, va, vb, vc, point.est, alpha.ml, beta.ml,
-                              gamma, optimal, weights)
-    
-    pa = dim(va)[2]
-    pb = dim(vb)[2]
-    pc = dim(vc)[2]
-    name = paste(c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)),
-                 c(1:pa, 1:pb, 1:pc))
-    point.est = c(point.est, beta.ml, gamma)
-    cov = matrix(NA,pa+pb+pc, pa+pb+pc)
-    cov[1:pa,1:pa] = alpha.cov
-    cov[(pa+1):(pa+pb),(pa+1):(pa+pb)] = beta.cov
-    cov[(pa+pb+1):(pa+pb+pc),(pa+pb+1):(pa+pb+pc)] = gamma.cov
-                 
-    sol = WrapResults(point.est, cov, param, name, va, vb, converged)
-    return(sol)
-    
-} 
diff --git a/R/MyFunc.R b/R/MyFunc.R
deleted file mode 100644
index 8f10652..0000000
--- a/R/MyFunc.R
+++ /dev/null
@@ -1,71 +0,0 @@
-logit = function(prob) {
-    log(prob) - log(1 - prob)
-}
-
-expit = function(logodds) {
-    1/(1 + exp(-logodds))
-}
-
-
-getlogop = function(p0, p1) {
-    log(p0) + log(p1) - log(1 - p0) - log(1 - p1)
-}
-
-getlogrr = function(p0, p1) {
-    log(p1) - log(p0)
-}
-
-getatanhrd = function(p0, p1) {
-    atanh(p1 - p0)
-}
-
-
-## Function for checking if two things are equal within numerical precision
-same = function(x, y, tolerance = .Machine$double.eps^0.5) {
-    abs(x - y) < tolerance
-}
-
-
-## Functions for wrapping estimation results into a nice format
-WrapResults = function(point.est, cov, param, name, va, vb, converged) {
-
-    se.est = sqrt(diag(cov))
-
-    conf.lower = point.est + stats::qnorm(0.025) * se.est
-    conf.upper = point.est + stats::qnorm(0.975) * se.est
-    p.temp = stats::pnorm(point.est/se.est, 0, 1)
-    p.value = 2 * pmin(p.temp, 1 - p.temp)
-
-    names(point.est) = names(se.est) = rownames(cov) = colnames(cov) = names(conf.lower) = names(conf.upper) = names(p.value) = name
-
-    coefficients = cbind(point.est, se.est, conf.lower, conf.upper, p.value)
-
-    linear.predictors = va %*% point.est[1:ncol(va)]
-    if(param=="RR") param.est = exp(linear.predictors)
-    if(param=="RD") param.est = linear.predictors
-    if(param=="OR") param.est = expit(linear.predictors)
-
-    sol = list(param = param, point.est = point.est, se.est = se.est, cov = cov,
-        conf.lower = conf.lower, conf.upper = conf.upper, p.value = p.value,
-        coefficients = coefficients, param.est = param.est, va = va, vb = vb,
-        converged = converged)
-    class(sol) = c("brm", "list")
-    attr(sol, "hidden") = c("param", "se.est", "cov", "conf.lower", "conf.upper",
-        "p.value","coefficients", "param.est", "va", "vb", "converged")
-
-    return(sol)
-
-}
-
-
-## This function is useful for finding limits on the boundary
-## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x)))
-## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio)
-## Limits are 0 and 1 as x goes to -infty and +infty respectively
-## The function will never return NaN given a numerical input
-
-getPrbAux = function(x) {
-    ifelse((x < 17) & (x > (-500)),
-           0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5),
-           ifelse(x<0, 0, 1))
-}
diff --git a/R/Print.R b/R/Print.R
deleted file mode 100644
index 4b5fe02..0000000
--- a/R/Print.R
+++ /dev/null
@@ -1,41 +0,0 @@
-
-#' Ancillary function for printing
-#'
-#' @param x a list obtained with the function 'brm'
-#' 
-#' @param ... additional arguments affecting the output
-#' 
-#' @export
-
-print.brm = function(x, ...) {
-    hid = attr(x, "hidden")
-    nhid = which(!names(x) %in% hid)
-    
-    if (x$param == "RR") {
-        cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.", 
-            "\n\n", sep = "")
-        cat("Target model:   log(RR) = alpha * va", "\n")
-        cat("Nuisance model: log(OP) = beta * vb", "\n\n")
-    }
-    if (x$param == "RD") {
-        cat("Parameter of interest: (conditional) risk difference;", "\n", 
-            "nuisance parameter: odds product.", "\n\n", sep = "")
-        cat("Target model:   log(RD) = alpha * va", "\n")
-        cat("Nuisance model: log(OP) = beta * vb", "\n\n")
-    }
-    if (x$param == "OR") {
-        cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.", 
-            "\n\n", sep = "")
-        cat("Target model:   log(OR) = alpha * va", "\n")
-        cat("Nuisance model: log(p0) = beta * vb", "\n\n")
-    }
-    
-    
-    for (i in nhid) {
-        x[[i]] = round(x[[i]], 3)
-    }
-    
-    print(x[nhid], 3)
-    
-    cat("See the element '$coefficients' for more information.\n")
-}
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 5cdfb5a..0bf8692 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -5,13 +5,12 @@
 #' @useDynLib brm
 #' @exportPattern ˆ[[:alpha:]]+
 compute_augmentation_cpp <- function(va, vb, fisher, k_rs, k_stu, k_s_tu) {
-    .Call('_brm_compute_augmentation_cpp', PACKAGE = 'brm', va, vb, fisher, k_rs, k_stu, k_s_tu)
+  .Call("_brm_compute_augmentation_cpp", PACKAGE = "brm", va, vb, fisher, k_rs, k_stu, k_s_tu)
 }
 
 #' @importFrom Rcpp evalCpp
 #' @useDynLib brm
 #' @exportPattern ˆ[[:alpha:]]+
 mat_vec_mul <- function(m, v) {
-    .Call('_brm_mat_vec_mul', PACKAGE = 'brm', m, v)
+  .Call("_brm_mat_vec_mul", PACKAGE = "brm", m, v)
 }
-
diff --git a/R/ValidCheck.R b/R/ValidCheck.R
deleted file mode 100644
index 3b74d4c..0000000
--- a/R/ValidCheck.R
+++ /dev/null
@@ -1,36 +0,0 @@
-
-ValidCheck = function(param, y, x, va, vb, vc, weights, subset, est.method, 
-                      optimal, max.step, thres, alpha.start, beta.start) {
-    if (!is.character(param)) 
-        stop("Parameter must be a character")
-    if (!(param %in% c("RD", "RR", "OR"))) 
-        stop("Parameter can only take RR, RD or OR")
-    
-    if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) + 
-        sum(is.na(vc)) + sum(is.na(weights)) > 0) 
-        warning("Observations with missing values will be removed.")
-    if (!(all(y %in% c(0, 1)))) 
-        stop("y values must be either 0 or 1.")
-    if (!(all(x %in% c(0, 1)))) 
-        stop("x values must be either 0 or 1.")
-    if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) 
-        stop("y, x and v must have the same length (dimension)")
-    
-    if(!is.numeric(weights))
-        stop("weights must either be NULL or take numerical values")
-    if(!is.numeric(subset))
-        stop("subset must either be NULL or take numerical values")
-    if (!(est.method %in% c("MLE", "DR"))) 
-        stop("Must use MLE or DR for estimation")
-    if (!is.logical(optimal)) 
-        stop("optimal must be a logical variable")
-    if(!is.numeric(max.step) & !is.null(max.step))
-        stop("max.step must be a number")
-    if(!is.numeric(thres))
-        stop("thres must be a number")
-    if(!is.null(alpha.start) & length(alpha.start) != dim(va)[2])
-        stop("length of alpha.start must match the dimension of va")
-    if(!is.null(beta.start) & length(beta.start) != dim(vb)[2])
-        stop("length of beta.start must match the dimension of vb")
-    
-} 
diff --git a/R/brm.R b/R/brm.R
index 3373435..9505ba9 100644
--- a/R/brm.R
+++ b/R/brm.R
@@ -1,20 +1,20 @@
-#' @title  Fitting Binary Regression Models
-#'
-#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is  especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}.
-#' 
-#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
-#' 
-#'   More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). 
-#'   
-#'    \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. 
-#'    
-#'    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
-#'    
-#'   See Richardson et al. (2017) for more details.
-#'
-#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
-#' 
-#'
-#' @docType package
-#' @name brm-package
-NULL
\ No newline at end of file
+#' @title  Fitting Binary Regression Models
+#'
+#' @description The function \code{brm} in this package provides an alternative to generalized linear models for fitting binary regression models, in which both the response \eqn{y} and the primary exposure of interest \eqn{x} are binary. This is  especially useful if the interest lies in estimating the association between \eqn{x} and \eqn{y}, and how that association varies as a function of (other) covariates \eqn{v}.
+#'
+#' Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
+#'
+#'   More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}).
+#'
+#'    \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation.
+#'
+#'    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
+#'
+#'   See Richardson et al. (2017) for more details.
+#'
+#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
+#'
+#'
+#' @docType package
+#' @name brm-package
+NULL
diff --git a/R/dr_est.R b/R/dr_est.R
new file mode 100644
index 0000000..3cb5b74
--- /dev/null
+++ b/R/dr_est.R
@@ -0,0 +1,40 @@
+dr_est <- function(
+  param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal,
+  weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message
+) {
+  dr_est <- dr_estimate_noiterate(
+    param, y, x, va, vb, vc, alpha_ml, beta_ml,
+    gamma, optimal, weights, max_step, thres, alpha_start, message
+  )
+  point_est <- dr_est$par
+  converged <- dr_est$convergence
+
+  if (param == "RR") {
+    alpha_cov <- var_rr_dr(
+      y, x, va, vb, vc, point_est, alpha_ml, beta_ml,
+      gamma, optimal, weights
+    )
+  }
+  if (param == "RD") {
+    alpha_cov <- var_rd_dr(
+      y, x, va, vb, vc, point_est, alpha_ml, beta_ml,
+      gamma, optimal, weights
+    )
+  }
+
+  pa <- dim(va)[2]
+  pb <- dim(vb)[2]
+  pc <- dim(vc)[2]
+  name <- paste(
+    c(rep("alpha", pa), rep("beta", pb), rep("gamma", pc)),
+    c(1:pa, 1:pb, 1:pc)
+  )
+  point_est <- c(point_est, beta_ml, gamma)
+  cov <- matrix(NA, pa + pb + pc, pa + pb + pc)
+  cov[1:pa, 1:pa] <- alpha_cov
+  cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)] <- beta_cov
+  cov[(pa + pb + 1):(pa + pb + pc), (pa + pb + 1):(pa + pb + pc)] <- gamma_cov
+
+  sol <- wrap_results(point_est, cov, param, name, va, vb, converged)
+  return(sol)
+}
diff --git a/R/dr_point.R b/R/dr_point.R
new file mode 100644
index 0000000..a00d971
--- /dev/null
+++ b/R/dr_point.R
@@ -0,0 +1,67 @@
+dr_estimate_onestep <- function(
+  param, y, x, va, vb, alpha_start, beta, pscore,
+  wt, weights, max_step, thres, message
+) {
+  startpars <- c(alpha_start) # pars only contain alpha
+  ## DR estimation equation^2
+  if (param == "RR") {
+    dr_objective <- function(pars) {
+      p0 <- get_prob_rr(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
+      H_alpha <- as.vector(y * exp(-x * (va %*% pars)))
+      tmp <- mat_vec_mul(t(va), (wt * (x - pscore) * (H_alpha - p0) * weights))
+      return(sum(tmp^2))
+    }
+  }
+  if (param == "RD") {
+    dr_objective <- function(pars) {
+      p0 <- get_prob_rd(mat_vec_mul(va, startpars), mat_vec_mul(vb, beta))[, 1]
+      H_alpha <- y - x * tanh(mat_vec_mul(va, pars))
+      tmp <- mat_vec_mul(t((H_alpha - p0) * (x - pscore)), (va * wt * weights))
+      return(sum(tmp^2))
+    }
+  }
+
+  opt <- stats::optim(startpars, dr_objective, control = list(reltol = thres))
+  opt$convergence <- (opt$convergence == 0) # change cf. optim()
+
+  return(opt)
+}
+
+
+dr_estimate_noiterate <- function(
+  param, y, x, va, vb, vc, alpha_ml, beta_ml,
+  gamma, optimal, weights, max_step, thres, alpha_start, message
+) {
+  pscore <- as.vector(expit(mat_vec_mul(vc, gamma)))
+
+  if (optimal == TRUE) {
+    if (param == "RR") {
+      p0 <- get_prob_rr(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1]
+      wt <- as.vector(1 / (1 - p0 + (1 - pscore) * (exp(mat_vec_mul(-va, alpha_ml)) -
+        1)))
+    }
+    if (param == "RD") {
+      p0 <- get_prob_rd(mat_vec_mul(va, alpha_ml), mat_vec_mul(vb, beta_ml))[, 1]
+      rho <- as.vector(tanh(mat_vec_mul(va, alpha_ml)))
+      wt <- (1 - rho) * (1 + rho) / (p0 * (1 - p0) + rho * (1 - pscore) *
+        (1 - 2 * p0 - rho))
+    }
+  } else {
+    wt <- rep(1, length(pscore))
+  }
+
+  if (is.null(alpha_start)) {
+    alpha_start <- alpha_ml
+  }
+
+  alpha_dr_opt <- dr_estimate_onestep(
+    param, y, x, va, vb, alpha_start, beta_ml,
+    pscore, wt, weights, max_step, thres, message
+  )
+
+  # if(MESSAGE){ print(paste('DR One Step: ',' Alpha:
+  # ',paste(round(alpha_dr,5),collapse=', '),' Beta:
+  # ',paste(round(beta_ml,5),collapse=', '))) }
+
+  return(alpha_dr_opt)
+}
diff --git a/R/getProbScalarRD.R b/R/getProbScalarRD.R
deleted file mode 100644
index 2dfcada..0000000
--- a/R/getProbScalarRD.R
+++ /dev/null
@@ -1,99 +0,0 @@
-
-#' Calculate risks from arctanh RD and log OP
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-#' @examples getProbScalarRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = mapply(getProbScalarRD, logrr, logop)
-#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-
-
-getProbScalarRD = function(atanhrd, logop) {
-    
-    if(length(atanhrd) == 2){
-        logop   = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    
-    if (logop > 350) {
-        if (atanhrd < 0) {
-            p0 = 1
-            p1 = p0 + tanh(atanhrd)
-        } else {
-            p1 = 1
-            p0 = p1 - tanh(atanhrd)
-        }
-    } else {
-        ## not on boundary logop = 0; solving linear equations
-        if (same(logop, 0)) {
-            p0 = 0.5 * (1 - tanh(atanhrd))
-        } else {
-            p0 = (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * 
-                (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * 
-                (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 
-                1))
-        }
-        p1 = p0 + tanh(atanhrd)
-    }
-    return(c(p0, p1))
-} 
-
-#' Calculate risks from arctanh RD and log OP (vectorised)
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-#' 
-#' @examples getProbRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = getProbRD(logrr, logop)
-#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-getProbRD = function(atanhrd, logop) {
-    if(is.matrix(atanhrd) && ncol(atanhrd) == 2){
-        logop = atanhrd[,2]
-        atanhrd = atanhrd[,1]
-    } else if(length(logop)==1 && is.na(logop) && length(atanhrd) == 2){
-        logop = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    p0 <- ifelse (logop > 350,
-                  ifelse(atanhrd < 0,
-                         1,
-                         1 - tanh(atanhrd)),
-                  ## not on boundary logop = 0; solving linear equations
-                  ifelse(same(logop, 0),
-                         0.5 * (1 - tanh(atanhrd)),
-                         (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 1))))
-    p1 <- ifelse (logop > 350,
-                  ifelse(atanhrd < 0,
-                         1 + tanh(atanhrd),
-                         1),
-                  ## not on boundary logop = 0
-                  p0 + tanh(atanhrd))
-    cbind(p0,p1)
-}
diff --git a/R/getProbScalarRR.R b/R/getProbScalarRR.R
deleted file mode 100644
index 6949c62..0000000
--- a/R/getProbScalarRR.R
+++ /dev/null
@@ -1,117 +0,0 @@
-
-
-#' Calculate risks from log RR and log OP
-#'
-#' @param logrr log of relative risk
-#' 
-#' @param logop log of odds product
-#'
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-#' @examples getProbScalarRR(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = mapply(getProbScalarRR, logrr, logop)
-#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-
-getProbScalarRR = function(logrr, logop = NA) {
-    
-    if(length(logrr) == 2){
-        logop = logrr[2]
-        logrr = logrr[1]
-    }
-    
-    if ((logop < (-12)) || (logop > 12) || (logrr < (-12)) || (logrr > 12)) {
-        # on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-        # logrr)
-        if ((logrr < (-12)) || ((logop < (-12)) && (logrr < 0))) {
-            p0 = getPrbAux(logop - logrr)
-            p1 = 0
-        } else {
-            if ((logrr > 12) || ((logop < (-12)) && (logrr > 0))) {
-                # West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                p0 = 0
-                p1 = getPrbAux(logrr + logop)
-            } else {
-                # North or East edges (=logop is large +ve)
-                p0 = min(exp(-logrr), 1)
-                p1 = min(exp(logrr), 1)
-            }
-        }
-    } else {
-        # not on the boundary logop = 0; solving linear equations logop not 0;
-        # solving a quadratic equation
-        if (same(logop, 0)) {
-            p0 = 1/(1 + exp(logrr))
-        } else {
-            p0 = (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 
-                1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * 
-                (1 - exp(logop)))
-        }
-        p1 = exp(logrr) * p0
-    }
-    return(c(p0, p1))
-} 
-
-#' Calculate risks from log RR and log OP (vectorised)
-#'
-#' @param logrr log of relative risk
-#' 
-#' @param logop log of odds product
-#'
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' 
-#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-#' 
-#' @examples getProbRR(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = getProbRR(logrr, logop)
-#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-getProbRR = function(logrr, logop = NA) {
-    if(is.matrix(logrr) && ncol(logrr) == 2){
-        logop = logrr[,2]
-        logrr = logrr[,1]
-    } else if(length(logop)==1 && is.na(logop) && length(logrr) == 2){
-        logop = logrr[2]
-        logrr = logrr[1]
-    }
-    p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
-                 ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-                 ## logrr)
-                 ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
-                         getPrbAux(logop-logrr),
-                         ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
-                                ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                                0,
-                                pmin(exp(-logrr), 1))),
-                 ## not on the boundary logop = 0; solving linear equations logop not 0;
-                 ## solving a quadratic equation
-          ifelse(same(logop, 0),
-                 1/(1 + exp(logrr)),
-                 (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop))))/(2 * exp(logrr) * (1 - exp(logop)))))
-    p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
-                 ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
-                 ## logrr
-                 ifelse ((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
-                         0,
-                         ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
-                                ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
-                                getPrbAux(logop + logrr),
-                                pmin(exp(logrr), 1))),
-                 ## not on the boundary logop = 0
-                 exp(logrr) * p0)
-    cbind(p0,p1)
-} 
diff --git a/R/get_prob_rd.R b/R/get_prob_rd.R
new file mode 100644
index 0000000..4bc99c0
--- /dev/null
+++ b/R/get_prob_rd.R
@@ -0,0 +1,50 @@
+#' Calculate risks from arctanh RD and log OP (vectorised)
+#'
+#' @param atanhrd arctanh of risk difference
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
+#'
+#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
+#'
+#' @examples get_prob_rd(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- get_prob_rd(logrr, logop)
+#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+get_prob_rd <- function(atanhrd, logop) {
+  if (is.matrix(atanhrd) && ncol(atanhrd) == 2) {
+    logop <- atanhrd[, 2]
+    atanhrd <- atanhrd[, 1]
+  } else if (length(logop) == 1 && is.na(logop) && length(atanhrd) == 2) {
+    logop <- atanhrd[2]
+    atanhrd <- atanhrd[1]
+  }
+  p0 <- ifelse(logop > 350,
+    ifelse(atanhrd < 0,
+      1,
+      1 - tanh(atanhrd)
+    ),
+    ## not on boundary logop = 0; solving linear equations
+    ifelse(same(logop, 0),
+      0.5 * (1 - tanh(atanhrd)),
+      (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1))
+    )
+  )
+  p1 <- ifelse(logop > 350,
+    ifelse(atanhrd < 0,
+      1 + tanh(atanhrd),
+      1
+    ),
+    ## not on boundary logop = 0
+    p0 + tanh(atanhrd)
+  )
+  cbind(p0, p1)
+}
diff --git a/R/get_prob_rr.R b/R/get_prob_rr.R
new file mode 100644
index 0000000..40097e2
--- /dev/null
+++ b/R/get_prob_rr.R
@@ -0,0 +1,62 @@
+#' Calculate risks from log RR and log OP
+#'
+#' @param logrr log of relative risk
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#'
+#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
+#'
+#' @examples get_prob_rr(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- get_prob_rr(logrr, logop)
+#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+get_prob_rr <- function(logrr, logop = NA) {
+  if (is.matrix(logrr) && ncol(logrr) == 2) {
+    logop <- logrr[, 2]
+    logrr <- logrr[, 1]
+  } else if (length(logop) == 1 && is.na(logop) && length(logrr) == 2) {
+    logop <- logrr[2]
+    logrr <- logrr[1]
+  }
+  p0 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
+    ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
+    ## logrr)
+    ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
+      get_prb_aux(logop - logrr),
+      ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
+        ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
+        0,
+        pmin(exp(-logrr), 1)
+      )
+    ),
+    ## not on the boundary logop = 0; solving linear equations logop not 0;
+    ## solving a quadratic equation
+    ifelse(same(logop, 0),
+      1 / (1 + exp(logrr)),
+      (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop)))
+    )
+  )
+  p1 <- ifelse((logop < (-12)) | (logop > 12) | (logrr < (-12)) | (logrr > 12),
+    ## on the boundary South edge: large -ve logrr or (large -ve logop and -ve
+    ## logrr
+    ifelse((logrr < (-12)) | ((logop < (-12)) & (logrr < 0)),
+      0,
+      ifelse((logrr > 12) | ((logop < (-12)) & (logrr > 0)),
+        ## West edge: large +ve logrr or (large -ve logop and +ve logrr)
+        get_prb_aux(logop + logrr),
+        pmin(exp(logrr), 1)
+      )
+    ),
+    ## not on the boundary logop = 0
+    exp(logrr) * p0
+  )
+  cbind(p0, p1)
+}
diff --git a/R/hessian_2_rd.R b/R/hessian_2_rd.R
new file mode 100644
index 0000000..ba4cab3
--- /dev/null
+++ b/R/hessian_2_rd.R
@@ -0,0 +1,69 @@
+hessian_2_rd <- function(y, x, va, vb, alpha_ml, beta_ml, cnt) {
+  # calculating the Hessian using the second derivative have to do so
+  # because under mis-specification of models Hessian no longer equals the
+  # square of the first order derivatives
+
+  p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0
+  pA[x == 1] <- p1[x == 1]
+  s0 <- p0 * (1 - p0)
+  s1 <- p1 * (1 - p1)
+  sA <- pA * (1 - pA)
+
+  rho <- as.vector(tanh(va %*% alpha_ml)) # estimated risk differences
+
+  ### First order derivatives ###
+
+  dl_by_dpA <- (y - pA) / sA
+  dp0_by_dphi <- s0 * s1 / (s0 + s1)
+  dp0_by_drho <- -s0 / (s0 + s1)
+  drho_by_dalpha <- va * (1 - rho^2)
+  dphi_by_dbeta <- vb
+
+  dpA_by_drho <- dp0_by_drho + x
+  dpA_by_dalpha <- drho_by_dalpha * dpA_by_drho
+  dpA_by_dphi <- dp0_by_dphi
+  dpA_by_dbeta <- dphi_by_dbeta * dpA_by_dphi
+
+  ### Second order derivatives ###
+
+  d2l_by_dpA_2 <- -(y - pA)^2 / sA^2
+  d2pA_by_drho_2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3
+  d2pA_by_dphi_drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 +
+    s1)^3
+  d2pA_by_dphi_2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 +
+    s1)^3
+
+  d2rho_by_dalpha_2 <- -2 * t(va * rho) %*% drho_by_dalpha
+
+  ### Compute elements of the Hessian matrix ###
+
+  d2l_by_dalpha_2 <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dalpha +
+    t(drho_by_dalpha * dl_by_dpA * d2pA_by_drho_2 * cnt) %*% drho_by_dalpha -
+    2 * t(va * rho * dl_by_dpA * dpA_by_drho * cnt) %*% drho_by_dalpha
+
+  d2l_by_dalpha_dbeta <- t(dpA_by_dalpha * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta +
+    t(drho_by_dalpha * dl_by_dpA * d2pA_by_dphi_drho * cnt) %*% dphi_by_dbeta
+  d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta)
+
+  d2l_by_dbeta_2 <- t(dpA_by_dbeta * d2l_by_dpA_2 * cnt) %*% dpA_by_dbeta +
+    t(dphi_by_dbeta * dl_by_dpA * d2pA_by_dphi_2 * cnt) %*% dphi_by_dbeta
+
+  hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind(
+    d2l_by_dbeta_dalpha,
+    d2l_by_dbeta_2
+  ))
+  ### NB Note the extra minus sign here
+
+  return(list(
+    hessian = hessian, p0 = p0, p1 = p1, pA = pA, s0 = s0, s1 = s1,
+    sA = sA, rho = rho, dl_by_dpA = dl_by_dpA, dp0_by_dphi = dp0_by_dphi,
+    dp0_by_drho = dp0_by_drho, drho_by_dalpha = drho_by_dalpha, dphi_by_dbeta = dphi_by_dbeta,
+    dpA_by_drho = dpA_by_drho, dpA_by_dalpha = dpA_by_dalpha, dpA_by_dphi = dpA_by_dphi,
+    dpA_by_dbeta = dpA_by_dbeta
+  ))
+}
diff --git a/R/hessian_2_rr.R b/R/hessian_2_rr.R
new file mode 100644
index 0000000..bfb4d14
--- /dev/null
+++ b/R/hessian_2_rr.R
@@ -0,0 +1,74 @@
+hessian_2_rr <- function(y, x, va, vb, alpha_ml, beta_ml, weights) {
+  # calculating the Hessian using the second derivative have to do so
+  # because under mis-specification of models Hessian no longer equals the
+  # square of the first order derivatives
+
+  p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0
+  pA[x == 1] <- p1[x == 1]
+
+
+  ### Building blocks
+
+  dpsi0_by_dtheta <- -(1 - p0) / (1 - p0 + 1 - p1)
+  dpsi0_by_dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1)
+
+  dtheta_by_dalpha <- va
+  dphi_by_dbeta <- vb
+
+  dl_by_dpsi0 <- (y - pA) / (1 - pA)
+  d2l_by_dpsi0_2 <- (y - 1) * pA / ((1 - pA)^2)
+
+
+  ###### d2l_by_dalpha_2
+
+  d2psi0_by_dtheta_2 <- ((p0 - p1) * dpsi0_by_dtheta - (1 - p0) * p1) / ((1 -
+    p0 + 1 - p1)^2)
+
+  d2l_by_dtheta_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x)^2 + dl_by_dpsi0 *
+    d2psi0_by_dtheta_2
+
+  d2l_by_dalpha_2 <- t(dtheta_by_dalpha * d2l_by_dtheta_2 * weights) %*%
+    dtheta_by_dalpha
+
+
+  ###### d2l_by_dalpha_dbeta
+
+  d2psi0_by_dtheta_dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 -
+    p1)^3
+
+  d2l_by_dtheta_dphi <- d2l_by_dpsi0_2 * (dpsi0_by_dtheta + x) * dpsi0_by_dphi +
+    dl_by_dpsi0 * d2psi0_by_dtheta_dphi
+
+  d2l_by_dalpha_dbeta <- t(dtheta_by_dalpha * d2l_by_dtheta_dphi * weights) %*%
+    dphi_by_dbeta
+  d2l_by_dbeta_dalpha <- t(d2l_by_dalpha_dbeta)
+  # d2l_by_dalpha_dbeta is symmetric itself if (because) va=vb
+
+
+  #### d2l_by_dbeta2
+
+  d2psi0_by_dphi_2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 -
+    p1)^2) * dpsi0_by_dphi
+
+  d2l_by_dphi_2 <- d2l_by_dpsi0_2 * (dpsi0_by_dphi)^2 + dl_by_dpsi0 * d2psi0_by_dphi_2
+
+  d2l_by_dbeta_2 <- t(dphi_by_dbeta * d2l_by_dphi_2 * weights) %*% dphi_by_dbeta
+
+
+  hessian <- -rbind(cbind(d2l_by_dalpha_2, d2l_by_dalpha_dbeta), cbind(
+    d2l_by_dbeta_dalpha,
+    d2l_by_dbeta_2
+  ))
+  ### NB Note the extra minus sign here
+
+  return(list(
+    hessian = hessian, p0 = p0, p1 = p1, pA = pA, dpsi0_by_dtheta = dpsi0_by_dtheta,
+    dpsi0_by_dphi = dpsi0_by_dphi, dtheta_by_dalpha = dtheta_by_dalpha,
+    dphi_by_dbeta = dphi_by_dbeta, dl_by_dpsi0 = dl_by_dpsi0
+  ))
+}
diff --git a/R/main.R b/R/main.R
index 7bdfa58..2635363 100644
--- a/R/main.R
+++ b/R/main.R
@@ -1,181 +1,200 @@
-#' Fitting Binary Regression Models
-#'
-#' @description  \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
-#'
-#' @param param The measure of association. Can take value 'RD' (risk difference),
-#'  'RR' (relative risk) or 'OR' (odds ratio)
-#'  
-#' @param y The response vector. Should only take values 0 or 1.
-#' 
-#' @param x The exposure vector. Should only take values 0 or 1.
-#' 
-#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix.
-#' 
-#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.)
-#' 
-#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)
-#' 
-#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector.
-#' 
-#' @param subset An optional vector specifying a subset of observations to be used in the fitting process.
-#' 
-#' @param est.method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).
-#' 
-#' @param optimal   Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE.
-#' 
-#' @param max.step  The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.
-#' 
-#' @param thres Threshold for judging convergence. The default is 1e-6.
-#' 
-#' @param alpha.start   Starting values for the parameters in the target model.
-#' 
-#' @param beta.start    Starting values for the parameters in the nuisance model.
-#'
-#' @param message   Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.
-#' 
-#' @details  \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. 
-#'If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. 
-#'If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. 
-#'If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. 
-#'For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. 
-#'For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
-#' In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}. 
-#' See Richardson et al. (2016+) for more details.
-#'
-#' If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
-#' 
-#' When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
-#' 
-#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
-#' 
-#' @return A list consisting of
-#'  \item{param}{the measure of association.} 
-#'
-#' \item{point.est}{ the point estimates.}
-#' 
-#' \item{se.est}{the standard error estimates.}
-#' 
-#' \item{cov}{estimate of the covariance matrix for the estimates.}
-#' 
-#' \item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.}
-#' 
-#' \item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.}
-#' 
-#' \item{p.value}{the two sided p-value for testing zero coefficients.}
-#' 
-#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
-#' 
-#' \item{param.est}{the fitted RR/RD/OR.}
-#' 
-#' \item{va}{   the matrix of covariates for the target model.}
-#' 
-#' \item{vb}{   the matrix of covariates for the nuisance model.}
-#' 
-#' \item{converged}{  Did the maximization process converge? }
-#' 
-#' @author Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
-#' 
-#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
-#' 
-#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
-#' 
-#' @seealso \code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
-#' 
-#' \code{predict.blm} for obtaining fitted probabilities from \code{brm} fits.
-#' 
-#' @examples 
-#' set.seed(0)
-#' n           = 100
-#' alpha.true  = c(0,-1)
-#' beta.true   = c(-0.5,1)
-#' gamma.true  = c(0.1,-0.5)
-#' params.true = list(alpha.true=alpha.true, beta.true=beta.true, 
-#'      gamma.true=gamma.true)
-#' v.1         = rep(1,n)       # intercept term
-#' v.2         = runif(n,-2,2) 
-#' v           = cbind(v.1,v.2)
-#' pscore.true = exp(v %*% gamma.true) / (1+exp(v %*% gamma.true))
-#' p0p1.true   = getProbRR(v %*% alpha.true,v %*% beta.true)
-#' x           = rbinom(n, 1, pscore.true)  
-#' pA.true       = p0p1.true[,1]
-#' pA.true[x==1] = p0p1.true[x==1,2]
-#' y = rbinom(n, 1, pA.true)
-#' 
-#' fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE)
-#' fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE)
-#' fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE)
-#'
-#' fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle
-#'
-#' @export
-
-
-brm = function(y, x, va, vb = NULL, param, est.method = "MLE", vc = NULL, 
-    optimal = TRUE, weights = NULL, subset = NULL, max.step = NULL, thres = 1e-8, 
-    alpha.start = NULL, beta.start = NULL, message = FALSE) {
-    
-    # default param = 'RR'; est.method = 'MLE'; va = v; vb = v; vc = v;
-    # weights = NULL; subset = NULL; optimal = TRUE; max.step = NULL; 
-    # thres = 1e-06; alpha.start = NULL; beta.start = NULL
-    
-    if (is.null(vb)) 
-        vb = va
-    if (is.null(vc)) 
-        vc = va
-    
-    if(class(va)[1] == "formula") va = stats::model.matrix(va)
-    if(class(vb)[1] == "formula") vb = stats::model.matrix(vb)
-    if(class(vc)[1] == "formula") vc = stats::model.matrix(vc)
-    
-    if(is.vector(va)) va = as.matrix(va, ncol = 1)
-    if(is.vector(vb)) vb = as.matrix(vb, ncol = 1)
-    if(is.vector(vc)) vc = as.matrix(vc, ncol = 1)
-    
-    if (is.null(weights)) 
-        weights = rep(1, length(y))
-    if (is.null(subset))
-        subset = 1:length(y)
-    
-    ValidCheck(param, y, x, va, vb, vc, weights, subset, est.method, optimal,
-               max.step, thres, alpha.start, beta.start)
-    
-    data = cbind(y,x,va,vb,vc,weights)[subset,]
-    subset = subset[rowSums(is.na(data)) == 0]
-    y = y[subset];  x = x[subset];  va = va[subset,,drop=FALSE];    vb = vb[subset,,drop=FALSE]
-    vc = vc[subset,,drop=FALSE];   weights = weights[subset]
-    
-    pa = dim(va)[2]
-    pb = dim(vb)[2]
-    if (is.null(max.step)) max.step = min(pa * 20, 1000)
-    
-    if (est.method == "MLE"){
-        sol = MLEst(param, y, x, va, vb, weights, max.step, thres, alpha.start, 
-                    beta.start, pa, pb)
-    }
-    if (est.method == "DR") {
-        if (param == "OR") {
-            cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n")
-            return()
-        }
-        if (is.null(alpha.start) | is.null(beta.start)){
-            sol = MLEst(param, y, x, va, vb, weights, max.step, thres, 
-                        alpha.start, beta.start, pa, pb)
-            alpha.ml = sol$point.est[1:pa]
-            beta.ml = sol$point.est[(pa+1):(pa+pb)]
-            beta.cov = sol$cov[(pa+1):(pa+pb),(pa+1):(pa+pb)]
-            alpha.start = alpha.ml
-        }else{
-            alpha.ml = alpha.start; beta.ml = beta.start
-            beta.cov = matrix(NA,pb,pb)
-        } 
-        
-        gamma.fit = stats::glm(x~vc-1, weight = weights, family="binomial")
-        gamma = gamma.fit$coefficients
-        gamma.cov = summary(gamma.fit)$cov.unscaled
-        sol = DREst(param, y, x, va, vb, vc, alpha.ml, beta.ml, gamma, optimal, 
-            weights, max.step, thres, alpha.start, beta.cov, gamma.cov, message)
-    }
-    
-    return(sol)
-    
-} 
+#' Fitting Binary Regression Models
+#'
+#' @description  \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
+#'
+#' @param param The measure of association. Can take value 'RD' (risk difference),
+#'  'RR' (relative risk) or 'OR' (odds ratio)
+#'
+#' @param y The response vector. Should only take values 0 or 1.
+#'
+#' @param x The exposure vector. Should only take values 0 or 1.
+#'
+#' @param va The covariates matrix for the target model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix.
+#'
+#' @param vb The covariates matrix for the nuisance model. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. (If not specified, defaults to va.)
+#'
+#' @param vc The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)
+#'
+#' @param weights An optional vector of 'prior weights' to be used in the fitting process. Should be NULL or a numeric vector.
+#'
+#' @param subset An optional vector specifying a subset of observations to be used in the fitting process.
+#'
+#' @param est_method The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).
+#'
+#' @param optimal   Use the optimal weighting function for the doubly robust estimator? Ignored if the estimation method is 'MLE'. The default is TRUE.
+#'
+#' @param max_step  The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.
+#'
+#' @param thres Threshold for judging convergence. The default is 1e-6.
+#'
+#' @param alpha_start   Starting values for the parameters in the target model.
+#'
+#' @param beta_start    Starting values for the parameters in the nuisance model.
+#'
+#' @param message   Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.
+#'
+#' @details  \code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation.
+#' If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}.
+#' If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}.
+#' If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}.
+#' For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}.
+#' For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
+#' In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}.
+#' See Richardson et al. (2016+) for more details.
+#'
+#' If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
+#'
+#' When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
+#'
+#' For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
+#'
+#' @return A list consisting of
+#'  \item{param}{the measure of association.}
+#'
+#' \item{point_est}{ the point estimates.}
+#'
+#' \item{se_est}{the standard error estimates.}
+#'
+#' \item{cov}{estimate of the covariance matrix for the estimates.}
+#'
+#' \item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.}
+#'
+#' \item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.}
+#'
+#' \item{p_value}{the two sided p-value for testing zero coefficients.}
+#'
+#' \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
+#'
+#' \item{param_est}{the fitted RR/RD/OR.}
+#'
+#' \item{va}{   the matrix of covariates for the target model.}
+#'
+#' \item{vb}{   the matrix of covariates for the nuisance model.}
+#'
+#' \item{converged}{  Did the maximization process converge? }
+#'
+#' @author Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
+#'
+#' @references Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimation for the Relative Risk and Risk Difference." Journal of the American Statistical Association: Theory and Methods (2017).
+#'
+#' Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
+#'
+#' @seealso \code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
+#'
+#' \code{predict_brm} for obtaining fitted probabilities from \code{brm} fits.
+#'
+#' @examples
+#' set.seed(0)
+#' n <- 100
+#' alpha_true <- c(0, -1)
+#' beta_true <- c(-0.5, 1)
+#' gamma_true <- c(0.1, -0.5)
+#' params_true <- list(
+#'   alpha_true = alpha_true, beta_true = beta_true,
+#'   gamma_true = gamma_true
+#' )
+#' v.1 <- rep(1, n) # intercept term
+#' v.2 <- runif(n, -2, 2)
+#' v <- cbind(v.1, v.2)
+#' pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true))
+#' p0p1.true <- get_prob_rr(v %*% alpha_true, v %*% beta_true)
+#' x <- rbinom(n, 1, pscore_true)
+#' pA_true <- p0p1.true[, 1]
+#' pA_true[x == 1] <- p0p1.true[x == 1, 2]
+#' y <- rbinom(n, 1, pA_true)
+#'
+#' fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+#' fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+#' fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE)
+#'
+#' fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle
+#'
+#' @export
+
+
+brm <- function(
+  y, x, va, vb = NULL, param, est_method = "MLE", vc = NULL,
+  optimal = TRUE, weights = NULL, subset = NULL, max_step = NULL, thres = 1e-8,
+  alpha_start = NULL, beta_start = NULL, message = FALSE
+) {
+  # default param = 'RR'; est_method = 'MLE'; va = v; vb = v; vc = v;
+  # weights = NULL; subset = NULL; optimal = TRUE; max_step = NULL;
+  # thres = 1e-06; alpha_start = NULL; beta_start = NULL
+
+  if (is.null(vb)) {
+    vb <- va
+  }
+  if (is.null(vc)) {
+    vc <- va
+  }
+
+  if (class(va)[1] == "formula") va <- stats::model.matrix(va)
+  if (class(vb)[1] == "formula") vb <- stats::model.matrix(vb)
+  if (class(vc)[1] == "formula") vc <- stats::model.matrix(vc)
+
+  if (is.vector(va)) va <- as.matrix(va, ncol = 1)
+  if (is.vector(vb)) vb <- as.matrix(vb, ncol = 1)
+  if (is.vector(vc)) vc <- as.matrix(vc, ncol = 1)
+
+  if (is.null(weights)) {
+    weights <- rep(1, length(y))
+  }
+  if (is.null(subset)) {
+    subset <- 1:length(y)
+  }
+
+  valid_check(
+    param, y, x, va, vb, vc, weights, subset, est_method, optimal,
+    max_step, thres, alpha_start, beta_start
+  )
+
+  data <- cbind(y, x, va, vb, vc, weights)[subset, ]
+  subset <- subset[rowSums(is.na(data)) == 0]
+  y <- y[subset]
+  x <- x[subset]
+  va <- va[subset, , drop = FALSE]
+  vb <- vb[subset, , drop = FALSE]
+  vc <- vc[subset, , drop = FALSE]
+  weights <- weights[subset]
+
+  pa <- dim(va)[2]
+  pb <- dim(vb)[2]
+  if (is.null(max_step)) max_step <- min(pa * 20, 1000)
+
+  if (est_method == "MLE") {
+    sol <- mle_est(
+      param, y, x, va, vb, weights, max_step, thres, alpha_start,
+      beta_start, pa, pb
+    )
+  }
+  if (est_method == "DR") {
+    if (param == "OR") {
+      cat("No doubly robust estimation methods for OR (with propensity score models) are available. Please refer to Tchetgen Tchetgen et al. (2010) for an alternative doubly robust estimation method. \n")
+      return()
+    }
+    if (is.null(alpha_start) | is.null(beta_start)) {
+      sol <- mle_est(
+        param, y, x, va, vb, weights, max_step, thres,
+        alpha_start, beta_start, pa, pb
+      )
+      alpha_ml <- sol$point_est[1:pa]
+      beta_ml <- sol$point_est[(pa + 1):(pa + pb)]
+      beta_cov <- sol$cov[(pa + 1):(pa + pb), (pa + 1):(pa + pb)]
+      alpha_start <- alpha_ml
+    } else {
+      alpha_ml <- alpha_start
+      beta_ml <- beta_start
+      beta_cov <- matrix(NA, pb, pb)
+    }
+
+    gamma_fit <- stats::glm(x ~ vc - 1, weight = weights, family = "binomial")
+    gamma <- gamma_fit$coefficients
+    gamma_cov <- summary(gamma_fit)$cov.unscaled
+    sol <- dr_est(
+      param, y, x, va, vb, vc, alpha_ml, beta_ml, gamma, optimal,
+      weights, max_step, thres, alpha_start, beta_cov, gamma_cov, message
+    )
+  }
+
+  return(sol)
+}
diff --git a/R/mle_est.R b/R/mle_est.R
new file mode 100644
index 0000000..8b538fe
--- /dev/null
+++ b/R/mle_est.R
@@ -0,0 +1,50 @@
+mle_est <- function(
+  param, y, x, va, vb, weights, max_step, thres, alpha_start,
+  beta_start, pa, pb
+) {
+  ## starting values for parameter optimization
+  if (is.null(alpha_start)) {
+    alpha_start <- rep(0, pa)
+  }
+  if (is.null(beta_start)) {
+    beta_start <- rep(0, pb)
+  }
+
+  if (param == "OR") {
+    fit <- stats::glm(y ~ vb - 1 + x * va - va - x,
+      family = "binomial",
+      weights = weights, start = c(beta_start, alpha_start)
+    )
+
+    point_temp <- summary(fit)$coefficients[, 1]
+    index <- c((pb + 1):(pa + pb), 1:pb)
+    point_est <- point_temp[index]
+
+    cov <- stats::vcov(fit)[index, index]
+
+    converged <- fit$converged
+  } else {
+    ### point estimate
+    mle <- mle_point(
+      param, y, x, va, vb, alpha_start, beta_start,
+      weights, max_step, thres, pa, pb
+    )
+    point_est <- mle$par
+    converged <- mle$convergence
+    # print(point_est)
+    alpha_ml <- point_est[1:pa]
+    beta_ml <- point_est[(pa + 1):(pa + pb)]
+
+    ### Computing Fisher Information:
+    if (param == "RR") {
+      cov <- mle_var_rr(x, alpha_ml, beta_ml, va, vb, weights)
+    }
+    if (param == "RD") {
+      cov <- mle_var_rd(x, alpha_ml, beta_ml, va, vb, weights)
+    }
+  }
+
+  name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
+  sol <- wrap_results(point_est, cov, param, name, va, vb, converged)
+  return(sol)
+}
diff --git a/R/mle_point.R b/R/mle_point.R
new file mode 100644
index 0000000..cc28893
--- /dev/null
+++ b/R/mle_point.R
@@ -0,0 +1,67 @@
+mle_point <- function(param, y, x, va, vb, alpha_start, beta_start, weights,
+                      max_step, thres, pa, pb) {
+  startpars <- c(alpha_start, beta_start)
+
+  getProb <- if (param == "RR") get_prob_rr else get_prob_rd
+
+  ## negative log likelihood function
+  neg_log_likelihood <- function(pars) {
+    alpha <- pars[1:pa]
+    beta <- pars[(pa + 1):(pa + pb)]
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) - sum((1 - y[x ==
+      1]) * log(1 - p1[x == 1]) * weights[x == 1] + (y[x == 1]) * log(p1[x ==
+      1]) * weights[x == 1]))
+  }
+
+  neg_log_likelihood_alpha <- function(alpha) {
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weights[x == 1]))
+  }
+
+  neg_log_likelihood_beta <- function(beta) {
+    p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weights[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weights[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weights[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weights[x == 1]))
+  }
+
+
+  ## Optimization
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha_start
+  beta <- beta_start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max_step) {
+    step <- step + 1
+    opt1 <- stats::optim(alpha, neg_log_likelihood_alpha, control = list(maxit = max(100, max_step / 10)))
+    diff1 <- Diff(opt1$par, alpha)
+    alpha <- opt1$par
+    opt2 <- stats::optim(beta, neg_log_likelihood_beta, control = list(maxit = max(100, max_step / 10)))
+    diff <- max(diff1, Diff(opt2$par, beta))
+    beta <- opt2$par
+  }
+
+  opt <- list(
+    par = c(alpha, beta), convergence = (step < max_step),
+    value = neg_log_likelihood(c(alpha, beta)), step = step
+  )
+
+  return(opt)
+}
diff --git a/R/mle_var.R b/R/mle_var.R
new file mode 100644
index 0000000..f51e300
--- /dev/null
+++ b/R/mle_var.R
@@ -0,0 +1,46 @@
+### variance calculation
+
+mle_var_rr <- function(x, alpha_ml, beta_ml, va, vb, weights) {
+  p0p1 <- get_prob_rr(va %*% alpha_ml, vb %*% beta_ml)
+  n <- dim(va)[1]
+  pA <- rep(NA, n) # P(Y=1|A,V); here A = X
+  pA[x == 0] <- p0p1[x == 0, 1]
+  pA[x == 1] <- p0p1[x == 1, 2]
+
+  expect_dl_by_dpsi0_squared <- (pA) / (1 - pA)
+  dpsi0_by_dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 -
+    p0p1[, 2]))
+  dpsi0_by_dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
+  tmp <- cbind((dpsi0_by_dtheta + x) * va, dpsi0_by_dphi * vb)
+  ## since dtheta_by_dalpha = va, and dphi_by_dbeta = vb
+  fisher_info <- (t(expect_dl_by_dpsi0_squared * weights * tmp) %*% tmp)
+  return(solve(fisher_info))
+}
+
+
+### variance calculation
+
+mle_var_rd <- function(x, alpha_ml, beta_ml, va, vb, weights) {
+  p0p1 <- get_prob_rd(va %*% alpha_ml, vb %*% beta_ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  n <- nrow(va)
+  pA <- p0 # P(Y=1|A,V); here A = X
+  pA[x == 1] <- p1[x == 1]
+  s0 <- p0 * (1 - p0)
+  s1 <- p1 * (1 - p1)
+  sA <- pA * (1 - pA)
+
+  rho <- as_vector(tanh(va %*% alpha_ml)) # estimated risk differences
+
+  expect_dl_by_dpA_squared <- 1 / sA
+  dp0_by_dphi <- s0 * s1 / (s0 + s1)
+  dp0_by_drho <- -s0 / (s0 + s1)
+  drho_by_dalpha <- (1 - rho^2) * va
+  dphi_by_dbeta <- vb
+
+  tmp <- cbind((dp0_by_drho + x) * drho_by_dalpha, dp0_by_dphi * dphi_by_dbeta)
+  fisher_info <- (t(expect_dl_by_dpA_squared * weights * tmp) %*% tmp)
+  return(solve(fisher_info))
+}
diff --git a/R/predict.brm.R b/R/predict.brm.R
deleted file mode 100644
index e98bc0a..0000000
--- a/R/predict.brm.R
+++ /dev/null
@@ -1,71 +0,0 @@
-#'  Fitted probabilities from \code{brm} fits
-#' 
-#' @description  Calculate fitted probabilities from a fitted binary regression model object.
-#' 
-#' @param object    A fitted object from function \code{brm}.
-#' 
-#' @param va.new    An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.
-#' 
-#' @param vb.new    An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used.
-#' 
-#' @param x.new     An optional vector of x.
-#' 
-#' @param ...    affecting the predictions produced.
-#' 
-#' @return If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
-#' 
-#' If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb).
-#' 
-#' @export
-
-
-predict.brm = function(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...) {
-    
-    va = object$va
-    vb = object$vb
-    
-    if(is.null(vb.new)){
-        if(is.null(va.new)){
-            vb.new = vb
-        }else{
-            vb.new = va.new
-        }
-    }
-    if(is.null(va.new)) va.new = va
-    
-    n  = nrow(va.new)
-    pa = ncol(va.new)
-    pb = ncol(vb.new)
-    alpha.est = object$point.est[1:pa]
-    beta.est  = object$point.est[(pa+1):(pa+pb)]
-    
-    linear.predictors = cbind(va.new %*% alpha.est, vb.new %*% beta.est)
-    if(object$param=="RR") 
-        p0p1 = getProbRR(linear.predictors)
-    if(object$param=="RD") 
-        p0p1 = getProbRD(linear.predictors)
-    if(object$param=="OR"){
-        p0 = expit(linear.predictors[,2])
-        or = exp(linear.predictors[,1])
-        odds1 = or * (p0 / (1-p0))
-        p1 = odds1 / (1+odds1)
-        p0p1 = cbind(p0, p1)
-    }
-    colnames(p0p1) = c("p0", "p1")
-    
-    if(!is.null(x.new)){
-        px = rep(NA, n)     
-        px[x.new == 0] = p0p1[x.new == 0, 1]
-        px[x.new == 1] = p0p1[x.new == 1, 2]
-        return(px)
-    }else{
-        return(p0p1)
-    }
-    
-} 
-
-
-
-
-
-
diff --git a/R/predict_brm.R b/R/predict_brm.R
new file mode 100644
index 0000000..2773dd3
--- /dev/null
+++ b/R/predict_brm.R
@@ -0,0 +1,65 @@
+#'  Fitted probabilities from \code{brm} fits
+#'
+#' @description  Calculate fitted probabilities from a fitted binary regression model object.
+#'
+#' @param object    A fitted object from function \code{brm}.
+#'
+#' @param va_new    An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.
+#'
+#' @param vb_new    An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used.
+#'
+#' @param x_new     An optional vector of x.
+#'
+#' @param ...    affecting the predictions produced.
+#'
+#' @return If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
+#'
+#' If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb).
+#'
+#' @export
+
+
+predict_brm <- function(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...) {
+  va <- object$va
+  vb <- object$vb
+
+  if (is.null(vb_new)) {
+    if (is.null(va_new)) {
+      vb_new <- vb
+    } else {
+      vb_new <- va_new
+    }
+  }
+  if (is.null(va_new)) va_new <- va
+
+  n <- nrow(va_new)
+  pa <- ncol(va_new)
+  pb <- ncol(vb_new)
+  alpha_est <- object$point_est[1:pa]
+  beta_est <- object$point_est[(pa + 1):(pa + pb)]
+
+  linear_predictors <- cbind(va_new %*% alpha_est, vb_new %*% beta_est)
+  if (object$param == "RR") {
+    p0p1 <- get_prob_rr(linear_predictors)
+  }
+  if (object$param == "RD") {
+    p0p1 <- get_prob_rd(linear_predictors)
+  }
+  if (object$param == "OR") {
+    p0 <- expit(linear_predictors[, 2])
+    or <- exp(linear_predictors[, 1])
+    odds1 <- or * (p0 / (1 - p0))
+    p1 <- odds1 / (1 + odds1)
+    p0p1 <- cbind(p0, p1)
+  }
+  colnames(p0p1) <- c("p0", "p1")
+
+  if (!is.null(x_new)) {
+    px <- rep(NA, n)
+    px[x_new == 0] <- p0p1[x_new == 0, 1]
+    px[x_new == 1] <- p0p1[x_new == 1, 2]
+    return(px)
+  } else {
+    return(p0p1)
+  }
+}
diff --git a/R/util.R b/R/util.R
new file mode 100644
index 0000000..dce7971
--- /dev/null
+++ b/R/util.R
@@ -0,0 +1,153 @@
+expit <- function(logodds) {
+  1 / (1 + exp(-logodds))
+}
+
+## Function for checking if two things are equal within numerical precision
+same <- function(x, y, tolerance = .Machine$double.eps^0.5) {
+  abs(x - y) < tolerance
+}
+
+
+## Functions for wrapping estimation results into a nice format
+wrap_results <- function(point_est, cov, param, name, va, vb, converged) {
+  se_est <- sqrt(diag(cov))
+
+  conf_lower <- point_est + stats::qnorm(0.025) * se_est
+  conf_upper <- point_est + stats::qnorm(0.975) * se_est
+  p_temp <- stats::pnorm(point_est / se_est, 0, 1)
+  p_value <- 2 * pmin(p_temp, 1 - p_temp)
+
+  names(point_est) <- names(se_est) <- rownames(cov) <- colnames(cov) <- names(conf_lower) <- names(conf_upper) <- names(p_value) <- name
+
+  coefficients <- cbind(point_est, se_est, conf_lower, conf_upper, p_value)
+
+  linear_predictors <- va %*% point_est[1:ncol(va)]
+  if (param == "RR") param_est <- exp(linear_predictors)
+  if (param == "RD") param_est <- linear_predictors
+  if (param == "OR") param_est <- expit(linear_predictors)
+
+  sol <- list(
+    param = param, point_est = point_est, se_est = se_est, cov = cov,
+    conf_lower = conf_lower, conf_upper = conf_upper, p_value = p_value,
+    coefficients = coefficients, param_est = param_est, va = va, vb = vb,
+    converged = converged
+  )
+  class(sol) <- c("brm", "list")
+  attr(sol, "hidden") <- c(
+    "param", "se_est", "cov", "conf_lower", "conf_upper",
+    "p_value", "coefficients", "param_est", "va", "vb", "converged"
+  )
+
+  return(sol)
+}
+
+
+## This function is useful for finding limits on the boundary
+## It gives 0.5*exp(x)*(-1+sqrt(1+4exp(-x)))
+## This is bounded between 0 and 1, and takes value (-1+sqrt(5))/2 at x=0 (Some relation to golden ratio)
+## Limits are 0 and 1 as x goes to -infty and +infty respectively
+## The function will never return NaN given a numerical input
+
+get_prb_aux <- function(x) {
+  ifelse((x < 17) & (x > (-500)),
+    0.5 * exp(x) * (-1 + (1 + 4 * exp(-x))^0.5),
+    ifelse(x < 0, 0, 1)
+  )
+}
+
+
+valid_check <- function(param, y, x, va, vb, vc, weights, subset, est_method,
+                        optimal, max_step, thres, alpha_start, beta_start) {
+  if (!is.character(param)) {
+    stop("Parameter must be a character")
+  }
+  if (!(param %in% c("RD", "RR", "OR"))) {
+    stop("Parameter can only take RR, RD or OR")
+  }
+
+  if (sum(is.na(y)) + sum(is.na(x)) + sum(is.na(va)) + sum(is.na(vb)) +
+    sum(is.na(vc)) + sum(is.na(weights)) > 0) {
+    warning("Observations with missing values will be removed.")
+  }
+  if (!(all(y %in% c(0, 1)))) {
+    stop("y values must be either 0 or 1.")
+  }
+  if (!(all(x %in% c(0, 1)))) {
+    stop("x values must be either 0 or 1.")
+  }
+  if (!identical(length(y), length(x), dim(va)[1], dim(vb)[1], dim(vc)[1])) {
+    stop("y, x and v must have the same length (dimension)")
+  }
+
+  if (!is.numeric(weights)) {
+    stop("weights must either be NULL or take numerical values")
+  }
+  if (!is.numeric(subset)) {
+    stop("subset must either be NULL or take numerical values")
+  }
+  if (!(est_method %in% c("MLE", "DR"))) {
+    stop("Must use MLE or DR for estimation")
+  }
+  if (!is.logical(optimal)) {
+    stop("optimal must be a logical variable")
+  }
+  if (!is.numeric(max_step) & !is.null(max_step)) {
+    stop("max_step must be a number")
+  }
+  if (!is.numeric(thres)) {
+    stop("thres must be a number")
+  }
+  if (!is.null(alpha_start) & length(alpha_start) != dim(va)[2]) {
+    stop("length of alpha_start must match the dimension of va")
+  }
+  if (!is.null(beta_start) & length(beta_start) != dim(vb)[2]) {
+    stop("length of beta_start must match the dimension of vb")
+  }
+}
+
+#' Ancillary function for printing
+#'
+#' @param x a list obtained with the function 'brm'
+#'
+#' @param ... additional arguments affecting the output
+#'
+#' @export
+
+print_brm <- function(x, ...) {
+  hid <- attr(x, "hidden")
+  nhid <- which(!names(x) %in% hid)
+
+  if (x$param == "RR") {
+    cat("Parameter of interest: (conditional) relative risk;", "\n", "nuisance parameter: odds product.",
+      "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(RR) = alpha * va", "\n")
+    cat("Nuisance model: log(OP) = beta * vb", "\n\n")
+  }
+  if (x$param == "RD") {
+    cat("Parameter of interest: (conditional) risk difference;", "\n",
+      "nuisance parameter: odds product.", "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(RD) = alpha * va", "\n")
+    cat("Nuisance model: log(OP) = beta * vb", "\n\n")
+  }
+  if (x$param == "OR") {
+    cat("Parameter of interest: (conditional) odds ratio;", "\n", "nuisance parameter: baseline risk.",
+      "\n\n",
+      sep = ""
+    )
+    cat("Target model:   log(OR) = alpha * va", "\n")
+    cat("Nuisance model: log(p0) = beta * vb", "\n\n")
+  }
+
+
+  for (i in nhid) {
+    x[[i]] <- round(x[[i]], 3)
+  }
+
+  print(x[nhid], 3)
+
+  cat("See the element '$coefficients' for more information.\n")
+}
diff --git a/R/var_rd_dr.R b/R/var_rd_dr.R
new file mode 100644
index 0000000..6f75d08
--- /dev/null
+++ b/R/var_rd_dr.R
@@ -0,0 +1,122 @@
+## Sandwich estimator for variance of RD
+
+var_rd_dr <- function(
+  y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma,
+  optimal, weights
+) {
+  ########################################
+
+  pscore <- as.vector(expit(vc %*% gamma))
+  n <- length(pscore)
+
+  ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing
+  ### the Hessian:
+
+  Hrd <- hessian_2_rd(y, x, va, vb, alpha_ml, beta_ml, weights)
+  hessian <- Hrd$hessian
+  p0 <- Hrd$p0
+  p1 <- Hrd$p1
+  pA <- Hrd$pA
+  s0 <- Hrd$s0
+  s1 <- Hrd$s1
+  sA <- Hrd$sA
+  rho <- Hrd$rho
+  dl_by_dpA <- Hrd$dl_by_dpA
+  dp0_by_dphi <- Hrd$dp0_by_dphi
+  dp0_by_drho <- Hrd$dp0_by_drho
+  drho_by_dalpha <- Hrd$drho_by_dalpha
+  dphi_by_dbeta <- Hrd$dphi_by_dbeta
+  dpA_by_drho <- Hrd$dpA_by_drho
+  dpA_by_dalpha <- Hrd$dpA_by_dalpha
+  dpA_by_dphi <- Hrd$dpA_by_dphi
+  dpA_by_dbeta <- Hrd$dpA_by_dbeta
+
+
+  ############# extra building blocks ##########################
+
+  H_alpha <- y - x * as.vector(tanh(va %*% alpha_dr))
+
+  ############# Calculation of optimal vector (used in several places below) ##
+
+  if (optimal == TRUE) {
+    wt <- (1 - rho^2) / (pscore * s0 + (1 - pscore) * s1)
+  } else {
+    wt <- rep(1, n)
+  }
+
+
+  ### 2. -E[dU_by_dalphaml_betaml] ####################################
+
+  dU_by_dp0 <- -va * wt * (x - pscore) # n by 2
+  dp0_by_dalpha_beta <- cbind(drho_by_dalpha * dp0_by_drho, dphi_by_dbeta *
+    dp0_by_dphi) # n by 4
+
+  dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    esA <- pscore * s0 + (1 - pscore) * s1 # E[s_{1-A}]...
+    dwt_by_drho <- (-2 * rho * esA - (1 - rho^2) * (1 - pscore) * (1 -
+      2 * p1)) / esA^2
+    dwt_by_dp0 <- -(1 - rho^2) * (2 * pscore * rho + 1 - 2 * p1) / esA^2
+
+    dwt_by_dalpha <- drho_by_dalpha * (dwt_by_drho + dwt_by_dp0 * dp0_by_drho)
+    dwt_by_dbeta <- dphi_by_dbeta * dwt_by_dp0 * dp0_by_dphi
+
+    dwt_by_dalpha_beta <- cbind(dwt_by_dalpha, dwt_by_dbeta) # n by 4
+  } else {
+    dwt_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb))
+  }
+
+  dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% (dp0_by_dalpha_beta) +
+    t(dU_by_dwt * weights) %*% dwt_by_dalpha_beta
+
+
+  ### 3. tau = -E[dU/dalpha_dr] ######################################## (This
+  ### is the bread of the sandwich estimate)
+
+  dU_by_dH <- va * wt * (x - pscore) # n by 2
+  rho_dr <- as.vector(tanh(va %*% alpha_dr))
+  dH_by_dalpha_dr <- -va * x * (1 - rho_dr^2)
+
+  tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2
+
+
+  ### 4. E[d(prop score score equation)/dgamma]
+
+  dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2
+  part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2
+
+
+  ### 5. E[dU/dgamma]
+
+  dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    dwt_by_dpscore <- -(1 - rho^2) * (s0 - s1) / esA^2
+    dwt_by_dgamma <- dpscore_by_dgamma * dwt_by_dpscore # n by 2
+  } else {
+    dwt_by_dgamma <- matrix(0, n, ncol(vc))
+  }
+
+  dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwt *
+    weights) %*% dwt_by_dgamma # 2 by 2
+
+
+  ############################################################################# Assembling semi-parametric variance matrix
+
+  U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2
+
+  S <- cbind(dpA_by_dalpha, dpA_by_dbeta) * dl_by_dpA
+
+  pscore_score <- vc * (x - pscore)
+
+  Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) -
+    t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2
+  USigma <- t(Utilde * weights) %*% Utilde / sum(weights)
+
+
+  ################################### Asymptotic var matrix for alpha_dr
+
+  alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights)
+  return(alpha_dr_variance)
+}
diff --git a/R/var_rr_dr.R b/R/var_rr_dr.R
new file mode 100644
index 0000000..feac259
--- /dev/null
+++ b/R/var_rr_dr.R
@@ -0,0 +1,113 @@
+## Sandwich estimator for variance of RR
+
+var_rr_dr <- function(
+  y, x, va, vb, vc, alpha_dr, alpha_ml, beta_ml, gamma,
+  optimal, weights
+) {
+  ########################################
+
+  pscore <- as.vector(expit(vc %*% gamma))
+  n <- length(pscore)
+
+  ### 1. - E[dS/d(alpha_ml,beta_ml)] ############################## Computing
+  ### the Hessian:
+
+  Hrr <- hessian_2_rr(y, x, va, vb, alpha_ml, beta_ml, weights)
+  hessian <- Hrr$hessian
+  p0 <- Hrr$p0
+  p1 <- Hrr$p1
+  pA <- Hrr$pA
+  dpsi0_by_dtheta <- Hrr$dpsi0_by_dtheta
+  dpsi0_by_dphi <- Hrr$dpsi0_by_dphi
+  dtheta_by_dalpha <- Hrr$dtheta_by_dalpha
+  dphi_by_dbeta <- Hrr$dphi_by_dbeta
+  dl_by_dpsi0 <- Hrr$dl_by_dpsi0
+
+  ############# extra building blocks ##########################
+
+  H_alpha <- y * exp(-x * (as.vector(va %*% alpha_dr)))
+
+  ############# Calculation of optimal vector (used in several places below) ##
+
+  if (optimal == TRUE) {
+    theta <- as.vector(va %*% alpha_ml) # avoid n by 1 matrix
+    dtheta_by_dalpha_beta <- cbind(va, matrix(0, n, length(beta_ml)))
+    wt <- 1 / (1 - p0 + (1 - pscore) * (exp(-theta) - 1))
+  } else {
+    wt <- rep(1, n)
+  }
+
+
+  ### 2. -E[dU_by_dalphaml_betaml] ####################################
+
+  dU_by_dp0 <- -va * wt * (x - pscore) # n by 2
+  dp0_by_dpsi0 <- p0
+  dpsi0_by_dalpha_beta <- cbind(dpsi0_by_dtheta * dtheta_by_dalpha, dpsi0_by_dphi *
+    dphi_by_dbeta) # n by 4
+  # 4 = 2 (alpha) + 2 (beta)
+  dp0_by_dalpha_beta <- dpsi0_by_dalpha_beta * dp0_by_dpsi0 # n by 4
+
+  dU_by_dwt <- va * (x - pscore) * (H_alpha - p0) # n by 2
+  dwt_by_dwti <- -wt^2 # n
+  # wti is short for wt_inv
+  dU_by_dwti <- dU_by_dwt * dwt_by_dwti # n by 2
+  if (optimal == TRUE) {
+    dwti_by_dalpha_beta <- -dp0_by_dalpha_beta - (1 - pscore) * exp(-theta) *
+      dtheta_by_dalpha_beta # n by 4
+  } else {
+    dwti_by_dalpha_beta <- matrix(0, n, ncol(va) + ncol(vb))
+  }
+
+  dU_by_dalpha_ml_beta_ml <- t(dU_by_dp0 * weights) %*% dp0_by_dalpha_beta +
+    t(dU_by_dwti * weights) %*% dwti_by_dalpha_beta
+
+
+  ### 3. tau = -E[dU/dalpha_dr] ######################################## (This
+  ### is the bread of the sandwich estimate)
+
+  dU_by_dH <- va * wt * (x - pscore) # n by 2
+  dH_by_dalpha_dr <- -va * x * H_alpha # n by 2
+
+  tau <- -t(dU_by_dH * weights) %*% dH_by_dalpha_dr / sum(weights) # 2 by 2
+
+
+  ### 4. E[d(prop score score equation)/dgamma]
+
+  dpscore_by_dgamma <- vc * pscore * (1 - pscore) # n by 2
+  part4 <- -t(vc * weights) %*% dpscore_by_dgamma # 2 by 2
+
+
+  ### 5. E[dU/dgamma]
+
+  dU_by_dpscore <- -va * wt * (H_alpha - p0) # n by 2
+
+  if (optimal == TRUE) {
+    dwti_by_dpscore <- 1 - exp(-theta) # n
+    dwti_by_dgamma <- dpscore_by_dgamma * dwti_by_dpscore # n by 2
+  } else {
+    dwti_by_dgamma <- matrix(0, n, ncol(vc))
+  }
+
+  dU_by_dgamma <- t(dU_by_dpscore * weights) %*% dpscore_by_dgamma + t(dU_by_dwti *
+    weights) %*% dwti_by_dgamma # 2 by 2
+
+
+  ############################################################################# Assembling semi-parametric variance matrix
+
+  U <- va * wt * (x - pscore) * (H_alpha - p0) # n by 2
+
+  S <- cbind(dl_by_dpsi0 * (dpsi0_by_dtheta + x) * dtheta_by_dalpha, dl_by_dpsi0 *
+    dpsi0_by_dphi * dphi_by_dbeta)
+  pscore_score <- vc * (x - pscore)
+
+  Utilde <- U - t(dU_by_dalpha_ml_beta_ml %*% (-solve(hessian)) %*% t(S)) -
+    t(dU_by_dgamma %*% (solve(part4)) %*% t(pscore_score)) # n by 2
+  USigma <- t(Utilde * weights) %*% Utilde / sum(weights)
+
+
+  ################################### Asymptotic var matrix for alpha_dr
+
+  alpha_dr_variance <- solve(tau) %*% USigma %*% solve(tau) / sum(weights)
+
+  return(alpha_dr_variance)
+}
diff --git a/compare/1.1_MLE_Point.R b/compare/1.1_MLE_Point.R
index b217824..8062384 100644
--- a/compare/1.1_MLE_Point.R
+++ b/compare/1.1_MLE_Point.R
@@ -1,87 +1,92 @@
-#' Maximum Likelihood Estimation via Alternating Optimization
-
-#' model by alternately optimizing the negative log-likelihood over \code{alpha} and \code{beta} until convergence or until a maximum number of iterations is reached.
-#'
-#' @param param Character. Specifies the model type: use \code{"RR"} for relative risk , use \code{"RD"} for risk difference.
-#' @param y Numeric vector. Binary response variable (0/1) of length \code{n}.
-#' @param x Numeric. Group indicator (0/1) of length \code{n}.
-#' @param va Numeric matrix. Design matrix for the \code{alpha} parameters (dimensions \code{n} × \code{pa}).
-#' @param vb Numeric matrix. Design matrix for the \code{beta} parameters (dimensions \code{n} × \code{pb}).
-#' @param alpha.start Numeric vector of length \code{pa}. Initial values for the \code{alpha} parameters.
-#' @param beta.start Numeric vector of length \code{pb}. Initial values for the \code{beta} parameters.
-#' @param weight Numeric vector of length \code{n}. Observation weight.
-#' @param max.step Integer. Maximum number of alternating optimization iterations.
-#' @param thres Numeric. Convergence threshold: the algorithm stops when the relative change in parameters falls below this value.
-#' @param pa Integer. Number of \code{alpha} parameters (length of \code{alpha.start}).
-#' @param pb Integer. Number of \code{beta} parameters (length of \code{beta.start}).
-#'
-#' @return A list with elements:
-#' \describe{
-#'   \item{\code{par}}{Numeric vector of length \code{pa + pb}: the estimated parameters \code{c(alpha, beta)}.}
-#'   \item{\code{convergence}}{Logical. \code{TRUE} if the algorithm converged within \code{max.step} iterations; otherwise \code{FALSE}.}
-#'   \item{\code{value}}{Numeric. Negative log-likelihood evaluated at the final parameter estimates.}
-#'   \item{\code{step}}{Integer. Number of iterations actually performed.}
-#' }
-#'
-max.likelihood = function(param, y, x, va, vb, alpha.start, beta.start, weight, 
-                          max.step, thres, pa, pb) {
-    
-    startpars = c(alpha.start, beta.start)
-    
-    getProb = if (param == "RR") getProbRR else getProbRD
-    
-    ## negative log likelihood function
-    neg.log.likelihood = function(pars) {
-        alpha = pars[1:pa]
-        beta = pars[(pa + 1):(pa + pb)]
-        p0p1 = getProb(va %*% alpha, vb %*% beta)
-        p0 = p0p1[, 1];   p1 = p0p1[, 2]
-        
-        return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] + 
-                        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x == 
-                                                                                          1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x == 
-                                                                                                                                                                 1]) * weight[x == 1]))
-    }
-    
-    neg.log.likelihood.alpha = function(alpha){
-        p0p1 = getProb(va %*% alpha, vb %*% beta)
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-        
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                        (y[x==0])*log(p0[x==0])*weight[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                           (y[x==1])*log(p1[x==1])*weight[x==1]))  
-    }
-    
-    neg.log.likelihood.beta = function(beta){
-        p0p1 = getProb(va %*% alpha, vb %*% beta)
-        p0    = p0p1[,1];  p1 = p0p1[,2]
-        
-        return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                        (y[x==0])*log(p0[x==0])*weight[x==0]) -
-                   sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                           (y[x==1])*log(p1[x==1])*weight[x==1]))  
-    }
-    
-    
-    ## Optimization 
-    
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha = alpha.start; beta = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-        step = step + 1
-        opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-        diff1 = Diff(opt1$par,alpha)
-        alpha = opt1$par
-        opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-        diff  = max(diff1,Diff(opt2$par,beta))
-        beta = opt2$par
-    }
-    
-    opt = list(par = c(alpha,beta), convergence = (step < max.step), 
-               value = neg.log.likelihood(c(alpha,beta)), step = step)
-    
-    return(opt)
-}
-
+#' Maximum Likelihood Estimation via Alternating Optimization
+
+#' model by alternately optimizing the negative log-likelihood over \code{alpha} and \code{beta} until convergence or until a maximum number of iterations is reached.
+#'
+#' @param param Character. Specifies the model type: use \code{"RR"} for relative risk , use \code{"RD"} for risk difference.
+#' @param y Numeric vector. Binary response variable (0/1) of length \code{n}.
+#' @param x Numeric. Group indicator (0/1) of length \code{n}.
+#' @param va Numeric matrix. Design matrix for the \code{alpha} parameters (dimensions \code{n} × \code{pa}).
+#' @param vb Numeric matrix. Design matrix for the \code{beta} parameters (dimensions \code{n} × \code{pb}).
+#' @param alpha.start Numeric vector of length \code{pa}. Initial values for the \code{alpha} parameters.
+#' @param beta.start Numeric vector of length \code{pb}. Initial values for the \code{beta} parameters.
+#' @param weight Numeric vector of length \code{n}. Observation weight.
+#' @param max.step Integer. Maximum number of alternating optimization iterations.
+#' @param thres Numeric. Convergence threshold: the algorithm stops when the relative change in parameters falls below this value.
+#' @param pa Integer. Number of \code{alpha} parameters (length of \code{alpha.start}).
+#' @param pb Integer. Number of \code{beta} parameters (length of \code{beta.start}).
+#'
+#' @return A list with elements:
+#' \describe{
+#'   \item{\code{par}}{Numeric vector of length \code{pa + pb}: the estimated parameters \code{c(alpha, beta)}.}
+#'   \item{\code{convergence}}{Logical. \code{TRUE} if the algorithm converged within \code{max.step} iterations; otherwise \code{FALSE}.}
+#'   \item{\code{value}}{Numeric. Negative log-likelihood evaluated at the final parameter estimates.}
+#'   \item{\code{step}}{Integer. Number of iterations actually performed.}
+#' }
+#'
+max.likelihood <- function(param, y, x, va, vb, alpha.start, beta.start, weight,
+                           max.step, thres, pa, pb) {
+  startpars <- c(alpha.start, beta.start)
+
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  ## negative log likelihood function
+  neg.log.likelihood <- function(pars) {
+    alpha <- pars[1:pa]
+    beta <- pars[(pa + 1):(pa + pb)]
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
+      1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
+      1]) * weight[x == 1]))
+  }
+
+  neg.log.likelihood.alpha <- function(alpha) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
+  }
+
+  neg.log.likelihood.beta <- function(beta) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
+  }
+
+
+  ## Optimization
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha.start
+  beta <- beta.start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max.step) {
+    step <- step + 1
+    opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+    diff1 <- Diff(opt1$par, alpha)
+    alpha <- opt1$par
+    opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+    diff <- max(diff1, Diff(opt2$par, beta))
+    beta <- opt2$par
+  }
+
+  opt <- list(
+    par = c(alpha, beta), convergence = (step < max.step),
+    value = neg.log.likelihood(c(alpha, beta)), step = step
+  )
+
+  return(opt)
+}
diff --git a/compare/1.2_MLE_Var.R b/compare/1.2_MLE_Var.R
index a0a282a..e16aabb 100644
--- a/compare/1.2_MLE_Var.R
+++ b/compare/1.2_MLE_Var.R
@@ -1,73 +1,67 @@
-
-#' Calculate the variance–covariance matrix of MLEs for the relative risk (RR) model and the riskdifference (RD) model
-#'
-#' This function computes the inverse of the observed Fisher information matrix
-
-#'
-#' @param x numeric vector of length \code{n}. Binary exposure indicator (0/1).
-#' @param alpha.ml Numeric vector of length \code{p_a}. Estimated \code{\alpha} parameters.
-#' @param beta.ml Numeric vector of length \code{p_b}. Estimated \code{\beta} parameters.
-#' @param va Numeric matrix of dimension \code{n \times p_a}. Design matrix for the \code{\alpha} component.
-#' @param vb Numeric matrix of dimension \code{n \times p_b}. Design matrix for the \code{\beta} component.
-#' @param weights Numeric vector of length \code{n}. Observation weights.
-#'
-#' @return A \code{p}-by-\code{p} matrix (\code{p = length(alpha.ml) + length(beta.ml)}), the variance–covariance matrix of the MLEs).
-#'
-#' need the package "MASS"
-
-
-### variance calculation
-
-var.mle.rr = function(x, alpha.ml, beta.ml, va, vb, weight) {
-
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    n = dim(vb)[1]
-    pA = rep(NA, n)
-    p0    = p0p1[,1];  p1 = p0p1[,2]
-    pA[x == 0] = p0p1[x == 0, 1]
-    pA[x == 1] = p0p1[x == 1, 2]
-
-
-    expect.dl.by.dpsi0.squared = (pA)/(1 - pA)
-    dpsi0.by.dphi = (1 - p0p1[, 1]) * (1 - p0p1[, 2])/((1 - p0p1[, 1]) + (1 -
-        p0p1[, 2]))
-    dpsi0.by.dtheta = -(1 - p0p1[, 1])/((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
-    tmp = cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
-    ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb
-    fisher.info = (t(expect.dl.by.dpsi0.squared * weight * tmp) %*% tmp)
-    return(ginv(fisher.info))
-}
-
-
-
-
-### variance calculation
-
-var.mle.rd = function(x, alpha.ml, beta.ml, va, vb, weight) {
-
-    p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-    # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-
-    n = nrow(va)
-    pA = p0             # P(Y=1|A,V); here A = X
-    pA[x == 1] = p1[x == 1]
-    s0 = p0 * (1 - p0)
-    s1 = p1 * (1 - p1)
-    sA = pA * (1 - pA)
-
-    rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences
-
-    expect.dl.by.dpA.squared = 1/sA
-    dp0.by.dphi = s0 * s1/(s0 + s1)
-    dp0.by.drho = -s0/(s0 + s1)
-    drho.by.dalpha = (1 - rho^2) * va
-    dphi.by.dbeta = vb
-
-    tmp = cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
-    fisher.info = (t(expect.dl.by.dpA.squared * weight * tmp) %*% tmp)
-    return(ginv(fisher.info))
-}
-
-
+#' Calculate the variance–covariance matrix of MLEs for the relative risk (RR) model and the riskdifference (RD) model
+#'
+#' This function computes the inverse of the observed Fisher information matrix
+
+#'
+#' @param x numeric vector of length \code{n}. Binary exposure indicator (0/1).
+#' @param alpha.ml Numeric vector of length \code{p_a}. Estimated \code{\alpha} parameters.
+#' @param beta.ml Numeric vector of length \code{p_b}. Estimated \code{\beta} parameters.
+#' @param va Numeric matrix of dimension \code{n \times p_a}. Design matrix for the \code{\alpha} component.
+#' @param vb Numeric matrix of dimension \code{n \times p_b}. Design matrix for the \code{\beta} component.
+#' @param weights Numeric vector of length \code{n}. Observation weights.
+#'
+#' @return A \code{p}-by-\code{p} matrix (\code{p = length(alpha.ml) + length(beta.ml)}), the variance–covariance matrix of the MLEs).
+#'
+#' need the package "MASS"
+
+
+### variance calculation
+
+var.mle.rr <- function(x, alpha.ml, beta.ml, va, vb, weight) {
+  p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml)
+  n <- dim(vb)[1]
+  pA <- rep(NA, n)
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+  pA[x == 0] <- p0p1[x == 0, 1]
+  pA[x == 1] <- p0p1[x == 1, 2]
+
+
+  expect.dl.by.dpsi0.squared <- (pA) / (1 - pA)
+  dpsi0.by.dphi <- (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 -
+    p0p1[, 2]))
+  dpsi0.by.dtheta <- -(1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2]))
+  tmp <- cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
+  ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb
+  fisher.info <- (t(expect.dl.by.dpsi0.squared * weight * tmp) %*% tmp)
+  return(ginv(fisher.info))
+}
+
+
+### variance calculation
+
+var.mle.rd <- function(x, alpha.ml, beta.ml, va, vb, weight) {
+  p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml)
+  # p0p1 = cbind(p0, p1): n * 2 matrix
+  p0 <- p0p1[, 1]
+  p1 <- p0p1[, 2]
+
+  n <- nrow(va)
+  pA <- p0 # P(Y=1|A,V); here A = X
+  pA[x == 1] <- p1[x == 1]
+  s0 <- p0 * (1 - p0)
+  s1 <- p1 * (1 - p1)
+  sA <- pA * (1 - pA)
+
+  rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences
+
+  expect.dl.by.dpA.squared <- 1 / sA
+  dp0.by.dphi <- s0 * s1 / (s0 + s1)
+  dp0.by.drho <- -s0 / (s0 + s1)
+  drho.by.dalpha <- (1 - rho^2) * va
+  dphi.by.dbeta <- vb
+
+  tmp <- cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
+  fisher.info <- (t(expect.dl.by.dpA.squared * weight * tmp) %*% tmp)
+  return(ginv(fisher.info))
+}
diff --git a/compare/1_CallMLE.R b/compare/1_CallMLE.R
index 8231f7d..a09102f 100644
--- a/compare/1_CallMLE.R
+++ b/compare/1_CallMLE.R
@@ -1,88 +1,106 @@
-MLEst = function(param, y, x, va, vb, weight, max.step, thres, alpha.start,
-    beta.start, pa, pb, method = "brm", CI = "wald") {
+MLEst <- function(
+  param, y, x, va, vb, weight, max.step, thres, alpha.start,
+  beta.start, pa, pb, method = "brm", CI = "wald"
+) {
+  ## starting values for parameter optimization
+  if (is.null(alpha.start)) {
+    alpha.start <- rep(0, pa)
+  }
+  if (is.null(beta.start)) {
+    beta.start <- rep(0, pb)
+  }
 
-    ## starting values for parameter optimization
-    if (is.null(alpha.start))
-        alpha.start = rep(0, pa)
-    if (is.null(beta.start))
-        beta.start = rep(0, pb)
+  if (param == "OR") {
+    fit <- stats::glm(y ~ vb - 1 + x * va - va - x,
+      family = "binomial",
+      weight = weight, start = c(beta.start, alpha.start)
+    )
 
-    if (param == "OR") {
-        fit = stats::glm(y ~ vb - 1 + x * va - va - x, family = "binomial",
-                         weight = weight, start = c(beta.start, alpha.start))
+    point.temp <- summary(fit)$coefficients[, 1]
+    index <- c((pb + 1):(pa + pb), 1:pb)
+    point.est <- point.temp[index]
 
-        point.temp = summary(fit)$coefficients[, 1]
-        index = c((pb + 1):(pa + pb), 1:pb)
-        point.est = point.temp[index]
+    cov <- stats::vcov(fit)[index, index]
 
-        cov = stats::vcov(fit)[index, index]
-
-        converged = fit$converged
-
-    } else {
-
-      ### point estimate
-      if(method == "brm"){
-          mle = max.likelihood(param, y, x, va, vb, alpha.start, beta.start,
-              weight, max.step, thres, pa, pb)
-      } else if(method == "firth"){
-          if(param == "RR"){
-              mle = max.likelihood.firth.rr(param, y, x, va, vb, alpha.start, beta.start,
-                  weight, max.step, thres, pa, pb)
-          } else {
-              mle = max.likelihood.firth.rd(param, y, x, va, vb, alpha.start, beta.start,
-                  weight, max.step, thres, pa, pb)
-          }
-      } else if(method == "jeffrey-p"){
-            mle = max.likelihood.jeffrey.direct(param, y, x, va, vb, alpha.start, beta.start,
-                  weight, max.step, thres, pa, pb)
-      } else if(method == "jeffrey-est"){
-            mle = max.likelihood.jeffrey(param, y, x, va, vb, alpha.start, beta.start,
-                  weight, max.step, thres, pa, pb)
+    converged <- fit$converged
+  } else {
+    ### point estimate
+    if (method == "brm") {
+      mle <- max.likelihood(
+        param, y, x, va, vb, alpha.start, beta.start,
+        weight, max.step, thres, pa, pb
+      )
+    } else if (method == "firth") {
+      if (param == "RR") {
+        mle <- max.likelihood.firth.rr(
+          param, y, x, va, vb, alpha.start, beta.start,
+          weight, max.step, thres, pa, pb
+        )
       } else {
-          stop(paste0(method, " is not a recognized method!"))
+        mle <- max.likelihood.firth.rd(
+          param, y, x, va, vb, alpha.start, beta.start,
+          weight, max.step, thres, pa, pb
+        )
       }
+    } else if (method == "jeffrey-p") {
+      mle <- max.likelihood.jeffrey.direct(
+        param, y, x, va, vb, alpha.start, beta.start,
+        weight, max.step, thres, pa, pb
+      )
+    } else if (method == "jeffrey-est") {
+      mle <- max.likelihood.jeffrey(
+        param, y, x, va, vb, alpha.start, beta.start,
+        weight, max.step, thres, pa, pb
+      )
+    } else {
+      stop(paste0(method, " is not a recognized method!"))
+    }
 
-      point.est = mle$par
-      converged = mle$convergence
-      alpha.ml = point.est[1:pa]
-      beta.ml = point.est[(pa + 1):(pa + pb)]
+    point.est <- mle$par
+    converged <- mle$convergence
+    alpha.ml <- point.est[1:pa]
+    beta.ml <- point.est[(pa + 1):(pa + pb)]
 
-      ### Computing Fisher Information:
-      if (param == "RR") {
-          cov = var.mle.rr(x, alpha.ml, beta.ml, va, vb, weight)
-      }
-      if (param == "RD") {
-          cov = var.mle.rd(x, alpha.ml, beta.ml, va, vb, weight)
-      }
-      sd.est = sqrt(diag(cov))
+    ### Computing Fisher Information:
+    if (param == "RR") {
+      cov <- var.mle.rr(x, alpha.ml, beta.ml, va, vb, weight)
     }
+    if (param == "RD") {
+      cov <- var.mle.rd(x, alpha.ml, beta.ml, va, vb, weight)
+    }
+    sd.est <- sqrt(diag(cov))
+  }
 
-    conf.lower = point.est + stats::qnorm(0.025) * sd.est
-    conf.upper = point.est + stats::qnorm(0.975) * sd.est
-    p.temp = stats::pnorm(point.est/sd.est, 0, 1)
-    p.value = 2 * pmin(p.temp, 1 - p.temp)
+  conf.lower <- point.est + stats::qnorm(0.025) * sd.est
+  conf.upper <- point.est + stats::qnorm(0.975) * sd.est
+  p.temp <- stats::pnorm(point.est / sd.est, 0, 1)
+  p.value <- 2 * pmin(p.temp, 1 - p.temp)
 
-    if(CI == "wald"){
-      ci.est = list(low = conf.lower,
-                    up = conf.upper,
-                    p = p.value)
-    }else if(CI == "exact"){
-      ci.est.alpha = exact(param,y, x, va, vb, weight, max.step, thres, thres.dicho=1e-3, point.est, sd.est, pa, pb)
-      ci.est = list(low = c(ci.est.alpha$low,conf.lower[(pa+1):(pa+pb)]),
-                    up = c(ci.est.alpha$up,conf.upper[(pa+1):(pa+pb)]),
-                    p = c(ci.est.alpha$p,p.value[(pa+1):(pa+pb)]))
-    }else if(CI == "LRT"){
-      ci.est.alpha = profile(param,y, x, va, vb, weight, max.step, thres, point.est, sd.est, pa, pb)
-      ci.est = list(low = c(ci.est.alpha$low,conf.lower[(pa+1):(pa+pb)]),
-                    up = c(ci.est.alpha$up,conf.upper[(pa+1):(pa+pb)]),
-                    p = c(ci.est.alpha$p,p.value[(pa+1):(pa+pb)]))
-    }else {
-      stop(paste0(CI, " is not a recognized method!"))
-    }
+  if (CI == "wald") {
+    ci.est <- list(
+      low = conf.lower,
+      up = conf.upper,
+      p = p.value
+    )
+  } else if (CI == "exact") {
+    ci.est.alpha <- exact(param, y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, point.est, sd.est, pa, pb)
+    ci.est <- list(
+      low = c(ci.est.alpha$low, conf.lower[(pa + 1):(pa + pb)]),
+      up = c(ci.est.alpha$up, conf.upper[(pa + 1):(pa + pb)]),
+      p = c(ci.est.alpha$p, p.value[(pa + 1):(pa + pb)])
+    )
+  } else if (CI == "LRT") {
+    ci.est.alpha <- profile(param, y, x, va, vb, weight, max.step, thres, point.est, sd.est, pa, pb)
+    ci.est <- list(
+      low = c(ci.est.alpha$low, conf.lower[(pa + 1):(pa + pb)]),
+      up = c(ci.est.alpha$up, conf.upper[(pa + 1):(pa + pb)]),
+      p = c(ci.est.alpha$p, p.value[(pa + 1):(pa + pb)])
+    )
+  } else {
+    stop(paste0(CI, " is not a recognized method!"))
+  }
 
-    name = paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
-    sol = WrapResults(point.est, cov, param, name, va, vb, converged, ci.est)
-    return(sol)
+  name <- paste(c(rep("alpha", pa), rep("beta", pb)), c(1:pa, 1:pb))
+  sol <- WrapResults(point.est, cov, param, name, va, vb, converged, ci.est)
+  return(sol)
 }
-
diff --git a/compare/CI_LRT.R b/compare/CI_LRT.R
index 60709e8..1cdbdb5 100644
--- a/compare/CI_LRT.R
+++ b/compare/CI_LRT.R
@@ -1,157 +1,166 @@
-#' Construct Likelihood-Ratio Confidence Intervals via Profiling (for \eqn{\alpha})
-#'
-#' Computes **profile-likelihood** confidence intervals and LRT p-values for each
-#' component of \eqn{\alpha} in a binary-outcome model with treatment indicator
-#' \eqn{x \in \{0,1\}}. For each \eqn{\alpha_j}, the function fixes \eqn{\alpha_j}
-#' at a grid of values, maximizes the log-likelihood over the remaining
-#' \eqn{\alpha_{-j}} and all \eqn{\beta}, evaluates the likelihood-ratio statistic,
-#' and finds where it falls below the \eqn{\chi^2_1} cutoff to form a
-#' \eqn{95\%} profile CI.
-#'
-#' @param param Character. Model family switch: use \code{"RR"} for a relative-risk
-#'   parametrization (via \code{getProbRR}), otherwise \code{"RD"} for a
-#'   risk-difference parametrization (via \code{getProbRD}).
-#' @param y Numeric vector of length \eqn{n}. Binary outcomes (0/1).
-#' @param x Numeric vector of length \eqn{n}. Binary exposure indicator (0/1).
-#' @param va Numeric matrix \eqn{n \times p_a}. Design for \eqn{\alpha}.
-#' @param vb Numeric matrix \eqn{n \times p_b}. Design for \eqn{\beta}.
-#' @param weight Numeric vector of length \eqn{n}. Observation weights.
-#' @param max.step Integer. Maximum number of alternating (block) optimization
-#'   iterations used when profiling each \eqn{\alpha_j}.
-#' @param thres Numeric. Convergence tolerance; the inner alternation stops when
-#'   the relative parameter change is below this value.
-#' @param pars Numeric vector of length \eqn{p_a + p_b}. Concatenated MLEs
-#'   \code{c(alpha.ml, beta.ml)} used to center search ranges and for the null
-#'   (unconstrained) fit in the LRT.
-#' @param se Numeric vector (typically of length \eqn{p_a}) giving marginal
-#'   standard errors for \eqn{\alpha} at the MLE; used to build the profiling
-#'   grid \eqn{\alpha_j \in [\alpha_{j,ml} \pm 3\,se_j]} (truncated to \code{[-12, 12]}).
-#' @param pa Integer. Number of \eqn{\alpha} parameters (\eqn{p_a}).
-#' @param pb Integer. Number of \eqn{\beta} parameters (\eqn{p_b}).
-#'
-#' @return A list with components:
-#' \describe{
-#'   \item{\code{low}}{Numeric vector of length \eqn{p_a}. Lower 95\% profile CI
-#'         for each \eqn{\alpha_j}.}
-#'   \item{\code{up}}{Numeric vector of length \eqn{p_a}. Upper 95\% profile CI
-#'         for each \eqn{\alpha_j}.}
-#'   \item{\code{p}}{Numeric vector of length \eqn{p_a}. LRT p-values for testing
-#'         \eqn{H_0:\ \alpha_j=0} vs \eqn{H_1:\ \alpha_j \neq 0}.}
-#' }
-#'
-
-profile <- function(param,y, x, va, vb, weight, max.step, thres, pars, se, pa, pb){
-  ## real data
-  getProb = if (param == "RR") getProbRR else getProbRD
-  alpha.ml = pars[1:pa]
-  beta.ml = pars[(pa + 1):(pa + pb)]
-  p0p1 = getProb(va %*% alpha.ml, vb %*% beta.ml)
-  p0.ml = p0p1[, 1];   p1.ml = p0p1[, 2]
-  ## profile
-
-  alpha.start <- rep(0,pa)
-  beta.start <- rep(0,pb)
-
-  optm.beta <- function(alphaj,j){
-
-    neg.log.likelihood = function(pars) {
-      alpha = pars[1:pa]
-      beta = pars[(pa + 1):(pa + pb)]
-      p0p1 = getProb(va %*% alpha, vb %*% beta)
-      p0 = p0p1[, 1];   p1 = p0p1[, 2]
-      eps <- 1e-12
-      p0 <- pmin(pmax(p0, eps), 1 - eps)
-      p1 <- pmin(pmax(p1, eps), 1 - eps)
-
-      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
-                    (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
-                                                                                    1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                         1]) * weight[x == 1]))
-    }
-
-    neg.log.likelihood.alpha = function(alpha){
-      p0p1 = getProb(va %*% alpha, vb %*% beta)
-      p0    = p0p1[,1];  p1 = p0p1[,2]
-      eps <- 1e-12
-      p0 <- pmin(pmax(p0, eps), 1 - eps)
-      p1 <- pmin(pmax(p1, eps), 1 - eps)
-
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
-    }
-
-    neg.log.likelihood.beta = function(beta){
-      p0p1 = getProb(va %*% alpha, vb %*% beta)
-      p0    = p0p1[,1];  p1 = p0p1[,2]
-      eps <- 1e-12
-      p0 <- pmin(pmax(p0, eps), 1 - eps)
-      p1 <- pmin(pmax(p1, eps), 1 - eps)
-
-
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
-    }
-
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha = alpha.start
-    alpha[j] = alphaj
-    beta = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-      step = step + 1
-      opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-      diff1 = Diff(opt1$par,alpha)
-      alpha = opt1$par
-      alpha[j] = alphaj
-      opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-      diff  = max(diff1,Diff(opt2$par,beta))
-      beta = opt2$par
-    }
-    return(neg.log.likelihood(c(alpha,beta)))
-  }
-
-  LRT.alpha <- function(alpha,j){
-    return(2*optm.beta(alpha.ml[j],j)-2*optm.beta(alpha[j],j))
-  }
-
-  get.lrt <- function(alpha){
-    lrt <- rep(0,length(alpha))
-    for (j in 1:length(alpha)) {
-      lrt[j] <- LRT.alpha(alpha,j)
-    }
-    return(lrt)
-  }
-
-  chi.th <- qchisq(0.95, df = 1)
-
-  alpha.seq <- lapply(1:pa, function(j){
-    seq(max(alpha.ml[j] - 3*se[j], -12),
-        min(alpha.ml[j] + 3*se[j],  12),
-        length.out = 40)
-  })
-  alpha.mat <- do.call(cbind, alpha.seq)
-
-  result.lrt <- apply(alpha.mat, 1, get.lrt)
-
-  lrt.mat <- if(pa>1) {t(result.lrt)} else{as.matrix(result.lrt,ncol = 1)}
-
-  alpha.up = rep(0,pa)
-  for (j in 1:pa) {
-    alpha.up[j] <- max(alpha.mat[which(lrt.mat[,j] <= chi.th),j])
-  }
-  alpha.low = rep(0,pa)
-  for (j in 1:pa) {
-    alpha.low[j] <- min(alpha.mat[which(lrt.mat[,j] <= chi.th),j])
-  }
-
-  p.values <- pchisq(get.lrt(alpha.start), df = 1, lower.tail = FALSE)
-  return(list(low = alpha.low,
-              up = alpha.up,
-              p = p.values))
-}
-
-
+#' Construct Likelihood-Ratio Confidence Intervals via Profiling (for \eqn{\alpha})
+#'
+#' Computes **profile-likelihood** confidence intervals and LRT p-values for each
+#' component of \eqn{\alpha} in a binary-outcome model with treatment indicator
+#' \eqn{x \in \{0,1\}}. For each \eqn{\alpha_j}, the function fixes \eqn{\alpha_j}
+#' at a grid of values, maximizes the log-likelihood over the remaining
+#' \eqn{\alpha_{-j}} and all \eqn{\beta}, evaluates the likelihood-ratio statistic,
+#' and finds where it falls below the \eqn{\chi^2_1} cutoff to form a
+#' \eqn{95\%} profile CI.
+#'
+#' @param param Character. Model family switch: use \code{"RR"} for a relative-risk
+#'   parametrization (via \code{getProbRR}), otherwise \code{"RD"} for a
+#'   risk-difference parametrization (via \code{getProbRD}).
+#' @param y Numeric vector of length \eqn{n}. Binary outcomes (0/1).
+#' @param x Numeric vector of length \eqn{n}. Binary exposure indicator (0/1).
+#' @param va Numeric matrix \eqn{n \times p_a}. Design for \eqn{\alpha}.
+#' @param vb Numeric matrix \eqn{n \times p_b}. Design for \eqn{\beta}.
+#' @param weight Numeric vector of length \eqn{n}. Observation weights.
+#' @param max.step Integer. Maximum number of alternating (block) optimization
+#'   iterations used when profiling each \eqn{\alpha_j}.
+#' @param thres Numeric. Convergence tolerance; the inner alternation stops when
+#'   the relative parameter change is below this value.
+#' @param pars Numeric vector of length \eqn{p_a + p_b}. Concatenated MLEs
+#'   \code{c(alpha.ml, beta.ml)} used to center search ranges and for the null
+#'   (unconstrained) fit in the LRT.
+#' @param se Numeric vector (typically of length \eqn{p_a}) giving marginal
+#'   standard errors for \eqn{\alpha} at the MLE; used to build the profiling
+#'   grid \eqn{\alpha_j \in [\alpha_{j,ml} \pm 3\,se_j]} (truncated to \code{[-12, 12]}).
+#' @param pa Integer. Number of \eqn{\alpha} parameters (\eqn{p_a}).
+#' @param pb Integer. Number of \eqn{\beta} parameters (\eqn{p_b}).
+#'
+#' @return A list with components:
+#' \describe{
+#'   \item{\code{low}}{Numeric vector of length \eqn{p_a}. Lower 95\% profile CI
+#'         for each \eqn{\alpha_j}.}
+#'   \item{\code{up}}{Numeric vector of length \eqn{p_a}. Upper 95\% profile CI
+#'         for each \eqn{\alpha_j}.}
+#'   \item{\code{p}}{Numeric vector of length \eqn{p_a}. LRT p-values for testing
+#'         \eqn{H_0:\ \alpha_j=0} vs \eqn{H_1:\ \alpha_j \neq 0}.}
+#' }
+#'
+
+profile <- function(param, y, x, va, vb, weight, max.step, thres, pars, se, pa, pb) {
+  ## real data
+  getProb <- if (param == "RR") getProbRR else getProbRD
+  alpha.ml <- pars[1:pa]
+  beta.ml <- pars[(pa + 1):(pa + pb)]
+  p0p1 <- getProb(va %*% alpha.ml, vb %*% beta.ml)
+  p0.ml <- p0p1[, 1]
+  p1.ml <- p0p1[, 2]
+  ## profile
+
+  alpha.start <- rep(0, pa)
+  beta.start <- rep(0, pb)
+
+  optm.beta <- function(alphaj, j) {
+    neg.log.likelihood <- function(pars) {
+      alpha <- pars[1:pa]
+      beta <- pars[(pa + 1):(pa + pb)]
+      p0p1 <- getProb(va %*% alpha, vb %*% beta)
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
+      eps <- 1e-12
+      p0 <- pmin(pmax(p0, eps), 1 - eps)
+      p1 <- pmin(pmax(p1, eps), 1 - eps)
+
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
+        1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
+        1]) * weight[x == 1]))
+    }
+
+    neg.log.likelihood.alpha <- function(alpha) {
+      p0p1 <- getProb(va %*% alpha, vb %*% beta)
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
+      eps <- 1e-12
+      p0 <- pmin(pmax(p0, eps), 1 - eps)
+      p1 <- pmin(pmax(p1, eps), 1 - eps)
+
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
+    }
+
+    neg.log.likelihood.beta <- function(beta) {
+      p0p1 <- getProb(va %*% alpha, vb %*% beta)
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
+      eps <- 1e-12
+      p0 <- pmin(pmax(p0, eps), 1 - eps)
+      p1 <- pmin(pmax(p1, eps), 1 - eps)
+
+
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
+    }
+
+    Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+    alpha <- alpha.start
+    alpha[j] <- alphaj
+    beta <- beta.start
+    diff <- thres + 1
+    step <- 0
+    while (diff > thres & step < max.step) {
+      step <- step + 1
+      opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+      diff1 <- Diff(opt1$par, alpha)
+      alpha <- opt1$par
+      alpha[j] <- alphaj
+      opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+      diff <- max(diff1, Diff(opt2$par, beta))
+      beta <- opt2$par
+    }
+    return(neg.log.likelihood(c(alpha, beta)))
+  }
+
+  LRT.alpha <- function(alpha, j) {
+    return(2 * optm.beta(alpha.ml[j], j) - 2 * optm.beta(alpha[j], j))
+  }
+
+  get.lrt <- function(alpha) {
+    lrt <- rep(0, length(alpha))
+    for (j in 1:length(alpha)) {
+      lrt[j] <- LRT.alpha(alpha, j)
+    }
+    return(lrt)
+  }
+
+  chi.th <- qchisq(0.95, df = 1)
+
+  alpha.seq <- lapply(1:pa, function(j) {
+    seq(max(alpha.ml[j] - 3 * se[j], -12),
+      min(alpha.ml[j] + 3 * se[j], 12),
+      length.out = 40
+    )
+  })
+  alpha.mat <- do.call(cbind, alpha.seq)
+
+  result.lrt <- apply(alpha.mat, 1, get.lrt)
+
+  lrt.mat <- if (pa > 1) {
+    t(result.lrt)
+  } else {
+    as.matrix(result.lrt, ncol = 1)
+  }
+
+  alpha.up <- rep(0, pa)
+  for (j in 1:pa) {
+    alpha.up[j] <- max(alpha.mat[which(lrt.mat[, j] <= chi.th), j])
+  }
+  alpha.low <- rep(0, pa)
+  for (j in 1:pa) {
+    alpha.low[j] <- min(alpha.mat[which(lrt.mat[, j] <= chi.th), j])
+  }
+
+  p.values <- pchisq(get.lrt(alpha.start), df = 1, lower.tail = FALSE)
+  return(list(
+    low = alpha.low,
+    up = alpha.up,
+    p = p.values
+  ))
+}
diff --git a/compare/CI_exact.R b/compare/CI_exact.R
index 92f6d71..098728c 100644
--- a/compare/CI_exact.R
+++ b/compare/CI_exact.R
@@ -13,7 +13,7 @@
 #' on \eqn{\alpha_j} until acceptability crosses the \code{0.05} threshold.
 #'
 #' @param param Character. Model family switch: \code{"RR"} (relative-risk scale)
-#'otherwise \code{"RD"} (risk-difference scale)
+#' otherwise \code{"RD"} (risk-difference scale)
 #' @param y Numeric vector of length \eqn{n}. Binary outcomes \code{0/1}.
 #' @param x Numeric vector of length \eqn{n}. Binary exposure \code{0/1}.
 #' @param va Numeric matrix \eqn{n \times p_a}. Design for \eqn{\alpha}.
@@ -58,140 +58,146 @@
 #' Blaker, H. (2000). Confidence curves and improved exact confidence intervals for discrete distributions. Canadian Journal of Statistics, 28(4), 783-798.
 #' https://doi.org/10.2307/3315916 (Theorem 1)
 #'
-exact <- function(param,y, x, va, vb, weight, max.step, thres, thres.dicho, pars, se, pa, pb){
+exact <- function(param, y, x, va, vb, weight, max.step, thres, thres.dicho, pars, se, pa, pb) {
   ## real data
-  getProb =if (param == "RR") getProbRR else getProbRD
-  alpha.ml = pars[1:pa]
-  beta.ml = pars[(pa + 1):(pa + pb)]
-  p0p1 = getProb(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))
-  p0.ml = p0p1[, 1];   p1.ml = p0p1[, 2]
+  getProb <- if (param == "RR") getProbRR else getProbRD
+  alpha.ml <- pars[1:pa]
+  beta.ml <- pars[(pa + 1):(pa + pb)]
+  p0p1 <- getProb(mat_vec_mul(va, alpha.ml), mat_vec_mul(vb, beta.ml))
+  p0.ml <- p0p1[, 1]
+  p1.ml <- p0p1[, 2]
   ## profile
 
-  alpha.start <- rep(0,pa)
-  beta.start <- rep(0,pb)
+  alpha.start <- rep(0, pa)
+  beta.start <- rep(0, pb)
 
-  optm.beta <- function(alphaj,j,y){
-
-    neg.log.likelihood = function(pars) {
-      alpha = pars[1:pa]
-      beta = pars[(pa + 1):(pa + pb)]
-      p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-      p0 = p0p1[, 1];   p1 = p0p1[, 2]
+  optm.beta <- function(alphaj, j, y) {
+    neg.log.likelihood <- function(pars) {
+      alpha <- pars[1:pa]
+      beta <- pars[(pa + 1):(pa + pb)]
+      p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
       eps <- 1e-12
       p0 <- pmin(pmax(p0, eps), 1 - eps)
       p1 <- pmin(pmax(p1, eps), 1 - eps)
 
       return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
-                    (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
-                                                                                    1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                         1]) * weight[x == 1]))
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
+        1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
+        1]) * weight[x == 1]))
     }
 
-    neg.log.likelihood.alpha = function(alpha){
-      p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-      p0    = p0p1[,1];  p1 = p0p1[,2]
+    neg.log.likelihood.alpha <- function(alpha) {
+      p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
       eps <- 1e-12
       p0 <- pmin(pmax(p0, eps), 1 - eps)
       p1 <- pmin(pmax(p1, eps), 1 - eps)
 
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
     }
 
-    neg.log.likelihood.beta = function(beta){
-      p0p1 = getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
-      p0    = p0p1[,1];  p1 = p0p1[,2]
+    neg.log.likelihood.beta <- function(beta) {
+      p0p1 <- getProb(mat_vec_mul(va, alpha), mat_vec_mul(vb, beta))
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
       eps <- 1e-12
       p0 <- pmin(pmax(p0, eps), 1 - eps)
       p1 <- pmin(pmax(p1, eps), 1 - eps)
 
 
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
     }
 
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha  = alpha.start
-    alpha[j] = alphaj
-    beta = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-      step = step + 1
-      opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-      diff1 = Diff(opt1$par,alpha)
-      alpha = opt1$par
-      alpha[j] = alphaj
-      opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-      diff  = max(diff1,Diff(opt2$par,beta))
-      beta = opt2$par
+    Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+    alpha <- alpha.start
+    alpha[j] <- alphaj
+    beta <- beta.start
+    diff <- thres + 1
+    step <- 0
+    while (diff > thres & step < max.step) {
+      step <- step + 1
+      opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+      diff1 <- Diff(opt1$par, alpha)
+      alpha <- opt1$par
+      alpha[j] <- alphaj
+      opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+      diff <- max(diff1, Diff(opt2$par, beta))
+      beta <- opt2$par
     }
-    return(neg.log.likelihood(c(alpha,beta)))
+    return(neg.log.likelihood(c(alpha, beta)))
   }
 
-  LRT.alpha <- function(alpha,j,y){
-    return(2*optm.beta(alpha.ml[j],j,y)-2*optm.beta(alpha,j,y))
+  LRT.alpha <- function(alpha, j, y) {
+    return(2 * optm.beta(alpha.ml[j], j, y) - 2 * optm.beta(alpha, j, y))
   }
 
 
-
   # Simulate distribution of observed profile‐LRT statistic
-  ptail <- function(alphaj, j, nsim = 500){
-
+  ptail <- function(alphaj, j, nsim = 500) {
     LRT.sim <- numeric(nsim)
 
-    neg.log.likelihood.alpha = function(alpha){
-      p0p1 = getProb(va %*% alpha, vb %*% beta.sim)
-      p0    = p0p1[,1];  p1 = p0p1[,2]
+    neg.log.likelihood.alpha <- function(alpha) {
+      p0p1 <- getProb(va %*% alpha, vb %*% beta.sim)
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
       eps <- 1e-12
       p0 <- pmin(pmax(p0, eps), 1 - eps)
       p1 <- pmin(pmax(p1, eps), 1 - eps)
 
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
     }
 
-    neg.log.likelihood.beta = function(beta){
-      p0p1 = getProb(va %*% alpha.sim, vb %*% beta)
-      p0    = p0p1[,1];  p1 = p0p1[,2]
+    neg.log.likelihood.beta <- function(beta) {
+      p0p1 <- getProb(va %*% alpha.sim, vb %*% beta)
+      p0 <- p0p1[, 1]
+      p1 <- p0p1[, 2]
       eps <- 1e-12
       p0 <- pmin(pmax(p0, eps), 1 - eps)
       p1 <- pmin(pmax(p1, eps), 1 - eps)
 
 
-      return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                    (y[x==0])*log(p0[x==0])*weight[x==0]) -
-               sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                     (y[x==1])*log(p1[x==1])*weight[x==1]))
+      return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+        (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+        sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+          (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]))
     }
 
-    Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-    alpha.sim  = alpha.start
-    alpha.sim[j] = alphaj
-    beta.sim  = beta.start
-    diff = thres + 1; step = 0
-    while(diff > thres & step < max.step){
-      step = step + 1
-      opt1 = stats::optim(alpha.sim,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-      diff1 = Diff(opt1$par,alpha.sim)
-      alpha.sim = opt1$par
-      alpha.sim[j] = alphaj
-      opt2 = stats::optim(beta.sim,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-      diff  = max(diff1,Diff(opt2$par,beta.sim))
-      beta.sim = opt2$par
+    Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+    alpha.sim <- alpha.start
+    alpha.sim[j] <- alphaj
+    beta.sim <- beta.start
+    diff <- thres + 1
+    step <- 0
+    while (diff > thres & step < max.step) {
+      step <- step + 1
+      opt1 <- stats::optim(alpha.sim, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+      diff1 <- Diff(opt1$par, alpha.sim)
+      alpha.sim <- opt1$par
+      alpha.sim[j] <- alphaj
+      opt2 <- stats::optim(beta.sim, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+      diff <- max(diff1, Diff(opt2$par, beta.sim))
+      beta.sim <- opt2$par
     }
 
     # Fitted probabilities under (alpha0, beta.sim)
-    prob = getProb(mat_vec_mul(va, alpha.sim), mat_vec_mul(vb, beta.sim))
-    p0 = prob[,1]; p1 = prob[,2]
+    prob <- getProb(mat_vec_mul(va, alpha.sim), mat_vec_mul(vb, beta.sim))
+    p0 <- prob[, 1]
+    p1 <- prob[, 2]
 
-    for(i in 1:nsim){
-      y.sim = numeric(length(y))
+    for (i in 1:nsim) {
+      y.sim <- numeric(length(y))
       y.sim[x == 0] <- rbinom(sum(x == 0), 1, p0[x == 0])
       y.sim[x == 1] <- rbinom(sum(x == 1), 1, p1[x == 1])
       LRT.sim[i] <- LRT.alpha(alphaj, j, y.sim)
@@ -215,12 +221,12 @@ exact <- function(param,y, x, va, vb, weight, max.step, thres, thres.dicho, pars
   }
 
   # function of dichotomy
-  dichotomy <- function(j, alpha.low, alpha.up, direction = 'low', thres.dicho = 1e-3,max.step = 20){
-    alpha.iteration = alpha.up
-    step = 1
-    while(alpha.up-alpha.low > thres.dicho & step < max.step){
-      LRT.obs <- LRT.alpha(alpha.iteration,j,y)
-      LRT.sim <- ptail(alpha.iteration,j, nsim = (21-step)*150)
+  dichotomy <- function(j, alpha.low, alpha.up, direction = "low", thres.dicho = 1e-3, max.step = 20) {
+    alpha.iteration <- alpha.up
+    step <- 1
+    while (alpha.up - alpha.low > thres.dicho & step < max.step) {
+      LRT.obs <- LRT.alpha(alpha.iteration, j, y)
+      LRT.sim <- ptail(alpha.iteration, j, nsim = (21 - step) * 150)
       a.val <- acceptability(alpha.iteration, LRT.obs, LRT.sim)
       cond <- if (direction == "low") {
         a.val > 0.05
@@ -228,42 +234,44 @@ exact <- function(param,y, x, va, vb, weight, max.step, thres, thres.dicho, pars
         a.val < 0.05
       }
       cond <- isTRUE(as.logical(cond))
-      if(cond){
-        alpha.up = alpha.iteration
-        alpha.iteration = (alpha.up + alpha.low)/2
-      }else{
-        alpha.low = alpha.iteration
-        alpha.iteration = (alpha.up + alpha.low)/2
+      if (cond) {
+        alpha.up <- alpha.iteration
+        alpha.iteration <- (alpha.up + alpha.low) / 2
+      } else {
+        alpha.low <- alpha.iteration
+        alpha.iteration <- (alpha.up + alpha.low) / 2
       }
-      step = step+1
+      step <- step + 1
     }
     return(list(alpha.dicho = alpha.iteration, convergence = (step < max.step)))
   }
 
 
   # get candidate of alpha
-  alpha.up.start = pmin(alpha.ml + 4*se[1:pa],8)
-  alpha.low.start = pmax(alpha.ml - 4*se[1:pa],-8)
+  alpha.up.start <- pmin(alpha.ml + 4 * se[1:pa], 8)
+  alpha.low.start <- pmax(alpha.ml - 4 * se[1:pa], -8)
 
-  alpha.up1 <- rep(0,pa)
-  for(j in 1:pa){
-    alpha.up1[j] = dichotomy(j,alpha.ml[j], alpha.up.start[j],'up',thres.dicho = 1e-3)$alpha.dicho
+  alpha.up1 <- rep(0, pa)
+  for (j in 1:pa) {
+    alpha.up1[j] <- dichotomy(j, alpha.ml[j], alpha.up.start[j], "up", thres.dicho = 1e-3)$alpha.dicho
   }
-  alpha.low1 <- rep(0,pa)
-  for(j in 1:pa){
-    alpha.low1[j] = dichotomy(j,alpha.low.start[j], alpha.ml[j],'low',thres.dicho = 1e-3)$alpha.dicho
+  alpha.low1 <- rep(0, pa)
+  for (j in 1:pa) {
+    alpha.low1[j] <- dichotomy(j, alpha.low.start[j], alpha.ml[j], "low", thres.dicho = 1e-3)$alpha.dicho
   }
 
   # Build the 95% CI
 
-  p.value <- rep(0,pa)
+  p.value <- rep(0, pa)
   for (j in 1:pa) {
-    LRT.obs.p = LRT.alpha(0,j,y)
-    LRT.sim.p <- ptail(0,j, nsim = 2000)
+    LRT.obs.p <- LRT.alpha(0, j, y)
+    LRT.sim.p <- ptail(0, j, nsim = 2000)
     p.value[j] <- acceptability(0, LRT.obs.p, LRT.sim.p)
   }
 
-  return(list(low = alpha.low1,
-              up = alpha.up1,
-              p = p.value))
+  return(list(
+    low = alpha.low1,
+    up = alpha.up1,
+    p = p.value
+  ))
 }
diff --git a/compare/MLE_Point_Firth_for_RD.R b/compare/MLE_Point_Firth_for_RD.R
index 63ae7ed..69459bf 100644
--- a/compare/MLE_Point_Firth_for_RD.R
+++ b/compare/MLE_Point_Firth_for_RD.R
@@ -1,204 +1,200 @@
-#' Maximum Likelihood Estimation for Risk‐Difference models with Firth's Augmentation
-#'
-#' Firth's method: Firth, D. (1993). Bias reduction of maximum likelihood estimates. Biometrika, 80(1), 27-38.
-#'
-#' Optimize the log‐likelihood for a binary‐outcome regression model on the
-#' risk‐difference (RD) scale using an iterative augmentation scheme.
-#'
-#' @param param Character string, takes\code{"RD"}
-#'
-#' @param y Numeric vector of length \eqn{n}.  Binary outcome values (0/1).
-#' @param x Numeric vector of length \eqn{n}.  Binary exposure indicator (0/1).
-#' @param va Numeric matrix of dimension \eqn{n \times pa}.
-#' @param vb Numeric matrix of dimension \eqn{n \times pb}.
-#' @param alpha.start Numeric vector of length \eqn{pa}, or \code{NULL}.  Initial
-#'   values for the \eqn{\alpha} parameters; if \code{NULL}, defaults to a zero
-#'   vector of length \eqn{pa}.
-#' @param beta.start Numeric vector of length \eqn{pb}, or \code{NULL}.  Initial
-#'   values for the \eqn{\beta} parameters; if \code{NULL}, defaults to a zero
-#'   vector of length \eqn{pb}.
-#' @param weight Numeric vector of length \eqn{n}.
-#' @param max.step Integer.  Maximum number of alternating iterations to perform.
-#' @param thres Numeric.  Convergence threshold on relative change in parameters.
-#' @param pa Integer.  Number of \eqn{\alpha} parameters (\eqn{pa}).
-#' @param pb Integer.  Number of \eqn{\beta} parameters (\eqn{pb}).
-#'
-#' this model is for pa = 1 and pb = 2, if pa and pb change, or param change, the function
-#' 'compute.components', 'compute.augmentation' and 'compute.score' need to be changed.
-#' If model change, 'compute.components' and 'compute.score' need to be changed.
-#'
-
-max.likelihood.firth.rd = function(param, y, x, va, vb, alpha.start, beta.start, weight, max.step, thres, pa, pb) {
-  ### augmentation calculation, compute:
-  #'   - the observed Fisher information matrix,
-  #'   - its inverse,
-  #'   - third‐order cumulants \(k_{s t u}\),  κ_{s,t,u} = n^{-1} * E{ U_s, U_t, U_u }
-  #'   - mixed cumulants \(k_{s, t u}\), κ_{s,tu}  = n^{-1} * E{ U_s, U_{tu}}.
-  #'
-compute.components = function(x, alpha.ml, beta.ml, va, vb, weight) {
-
-
-  p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml) #n by 2
-  # p0p1 = cbind(p0, p1): n * 2 matrix
-  p0 = p0p1[, 1]
-  p1 = p0p1[, 2]
-  n = nrow(vb)
-  pA = p0
-  pA[x == 1] = p1[x == 1]
-  # s0 = p0 * (1 - p0) # n by 1
-  # s1 = p1 * (1 - p1)
-  # sA = pA * (1 - pA)
-  s0 <- pmax(p0 * (1 - p0), 1e-8)
-  s1 <- pmax(p1 * (1 - p1), 1e-8)
-  sA <- pmax(pA * (1 - pA), 1e-8)
-
-
-  rho = as.vector(tanh(va %*% alpha.ml))  #estimated risk differences n by 1
-
-  ### First order derivatives ###
-  va.1 = 1
-  vb.1 = 1
-  expect.dl.by.dpA = 1/sA # n by 1
-  dp0.by.dphi = s0 * s1/(s0 + s1) # n by 1
-  dp0.by.drho = -s0/(s0 + s1) # n by 1
-  drho.by.dalpha = va.1*(1 - rho^2) # n by 1
-  dphi.by.dbeta =vb.1
-
-  dpA.by.drho = dp0.by.drho + x # n by 1
-  dpA.by.dalpha = drho.by.dalpha * dpA.by.drho # n by 1
-  dpA.by.dphi = dp0.by.dphi # n by 1
-  dpA.by.dbeta = dphi.by.dbeta * dpA.by.dphi # n by 1
-
-
-
-
-  ### Second order derivatives ###
-
-  expect.d2l.by.dpA.2 = -(1 - 2*pA)/sA^2 # n by 1
-  d2pA.by.drho.2 = s0 * s1 * (2 - 2 * p0 - 2 * p1)/(s0 + s1)^3 # n by 1
-  d2pA.by.dphi.drho = (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1/(s0 + s1)^3 # n by 1
-  d2pA.by.dphi.2 = (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1/(s0 +s1)^3 # n by 1
-
-  d2rho.by.dalpha.2 = -2 * va.1 * rho * drho.by.dalpha # n by 1
-
-  ### Compute elements of the Hessian matrix ###
-
-  d2l.by.dalpha.2 = dpA.by.dalpha * expect.d2l.by.dpA.2 * weight * dpA.by.dalpha +
-    drho.by.dalpha * expect.dl.by.dpA * d2pA.by.drho.2 * weight * drho.by.dalpha -
-    2 * va.1 * rho * expect.dl.by.dpA * dpA.by.drho * weight * drho.by.dalpha # n by 1
-
-  d2l.by.dalpha.dbeta = dpA.by.dalpha * expect.d2l.by.dpA.2 * weight * dpA.by.dbeta +
-    drho.by.dalpha * expect.dl.by.dpA * d2pA.by.dphi.drho * weight * dphi.by.dbeta
-  d2l.by.dbeta.dalpha = d2l.by.dalpha.dbeta
-
-  d2l.by.dbeta.2 = dpA.by.dbeta * expect.d2l.by.dpA.2 * weight * dpA.by.dbeta +
-    dphi.by.dbeta * expect.dl.by.dpA * d2pA.by.dphi.2 * weight * dphi.by.dbeta
-
-  ###
-
-
-  ##  fisher info
-
-  ## k_{s,t,u}
-  c.stu.A = (1-2*pA)/(sA^2)
-  c.stu.alpha = (dp0.by.drho + x) * drho.by.dalpha
-  c.stu.beta = dp0.by.dphi
-
-  k.aaa = c.stu.A*c.stu.alpha^3
-  k.aab = c.stu.A*c.stu.alpha^2*c.stu.beta
-  k.abb = c.stu.A*c.stu.alpha*c.stu.beta^2
-  k.bbb = c.stu.A*c.stu.beta^3
-
-
-  ## k_{s,tu}
-
-
-  k.a.aa = c.stu.alpha*d2l.by.dalpha.2
-  k.a.ab = c.stu.alpha*d2l.by.dalpha.dbeta
-  k.a.bb = c.stu.alpha*d2l.by.dbeta.2
-  k.b.aa = c.stu.beta*d2l.by.dalpha.2
-  k.b.ab = c.stu.beta*d2l.by.dalpha.dbeta
-  k.b.bb = c.stu.beta*d2l.by.dbeta.2
-
-  expect.dl.by.dpA.squared = 1/sA
-  dp0.by.dphi = s0 * s1/(s0 + s1)
-  dp0.by.drho = -s0/(s0 + s1)
-  drho.by.dalpha = va*(1 - rho^2)
-  dphi.by.dbeta = vb
-
-  tmp = cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
-  fisher.info = (t(expect.dl.by.dpA.squared * weight * tmp) %*% tmp)
-
-
-  return(list(fisher = fisher.info,fisher.invers = ginv(fisher.info),k.stu = cbind(k.aaa, k.aab, k.abb, k.bbb),k.s.tu = cbind(k.a.aa, k.a.ab, k.a.bb, k.b.aa, k.b.ab, k.b.bb)))
-}
-
-compute.augmentation <- function(components,va,vb){
-  pa = ncol(va)
-  pb = ncol(vb)
-  n = dim(vb)[1]
-  fisher = components$fisher
-  k.rs = components$fisher.invers
-  k.stu = components$k.stu
-  k.s.tu = components$k.s.tu
-
-  return(compute_augmentation_cpp(va, vb, fisher, k.rs, k.stu, k.s.tu))
-
-}
-
-compute.score <- function(x, alpha.ml, beta.ml, va, vb){
-
-  p0p1 = getProbRD(va %*% alpha.ml, vb %*% beta.ml)
-  p0 = p0p1[, 1]
-  p1 = p0p1[, 2]
-  n = nrow(vb)
-  pA = p0
-  pA[x == 1] = p1[x == 1]
-  s0 = p0 * (1 - p0) # n by 1
-  s1 = p1 * (1 - p1)
-  sA = pA * (1 - pA)
-
-  rho = as.vector(tanh(va * alpha.ml))
-
-  score.alpha <- colSums(((y-pA)/sA)*(x-s0/(s0 + s1))*(1 - rho^2) * va)
-  score.beta <- colSums(((y-pA)/sA)*(s0 * s1/(s0 + s1))*vb)
-  return(c(score.alpha,score.beta))
-}
-
-
-optim.alpha <- function(alpha,beta){
-  score.intial = compute.score(x,alpha,beta,va,vb)
-  components = compute.components(x,alpha,beta,va,vb,weight)
-  augment.intial = compute.augmentation(components,va,vb)
-  return(max(abs(score.intial[1:pa] + t(augment.intial)[1:pa])))
-}
-optim.beta <- function(alpha,beta){
-  score.intial = compute.score(x,alpha,beta,va,vb)
-  components = compute.components(x,alpha,beta,va,vb,weight)
-  augment.intial = compute.augmentation(components,va,vb)
-  return(max(abs(score.intial[(pa+1):(pa+pb)] + t(augment.intial)[(pa+1):(pa+pb)])))
-}
-
-
-  Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-  alpha = alpha.start
-  beta = beta.start
-  diff = thres + 1; step = 0
-  while(diff > thres & step < max.step){
-    step = step+1
-    target.alpha <- function(alpha) { optim.alpha(alpha,beta) }
-    result.a <- optim(alpha, target.alpha,control=list(maxit=max(100,max.step/10)))
-    diff1 = Diff(result.a$par,alpha)
-    alpha = result.a$par
-    target.beta <- function(beta) { optim.beta(alpha,beta) }
-    result.b <- optim(beta, target.beta,control=list(maxit=max(100,max.step/10)))
-    diff  = max(diff1,Diff(result.b$par,beta))
-    beta = result.b$par
-  }
-  opt = list(par = c(alpha,beta), convergence = (step < max.step), step = step)
-
-  return(opt)
-}
-
-
-
+#' Maximum Likelihood Estimation for Risk‐Difference models with Firth's Augmentation
+#'
+#' Firth's method: Firth, D. (1993). Bias reduction of maximum likelihood estimates. Biometrika, 80(1), 27-38.
+#'
+#' Optimize the log‐likelihood for a binary‐outcome regression model on the
+#' risk‐difference (RD) scale using an iterative augmentation scheme.
+#'
+#' @param param Character string, takes\code{"RD"}
+#'
+#' @param y Numeric vector of length \eqn{n}.  Binary outcome values (0/1).
+#' @param x Numeric vector of length \eqn{n}.  Binary exposure indicator (0/1).
+#' @param va Numeric matrix of dimension \eqn{n \times pa}.
+#' @param vb Numeric matrix of dimension \eqn{n \times pb}.
+#' @param alpha.start Numeric vector of length \eqn{pa}, or \code{NULL}.  Initial
+#'   values for the \eqn{\alpha} parameters; if \code{NULL}, defaults to a zero
+#'   vector of length \eqn{pa}.
+#' @param beta.start Numeric vector of length \eqn{pb}, or \code{NULL}.  Initial
+#'   values for the \eqn{\beta} parameters; if \code{NULL}, defaults to a zero
+#'   vector of length \eqn{pb}.
+#' @param weight Numeric vector of length \eqn{n}.
+#' @param max.step Integer.  Maximum number of alternating iterations to perform.
+#' @param thres Numeric.  Convergence threshold on relative change in parameters.
+#' @param pa Integer.  Number of \eqn{\alpha} parameters (\eqn{pa}).
+#' @param pb Integer.  Number of \eqn{\beta} parameters (\eqn{pb}).
+#'
+#' this model is for pa = 1 and pb = 2, if pa and pb change, or param change, the function
+#' 'compute.components', 'compute.augmentation' and 'compute.score' need to be changed.
+#' If model change, 'compute.components' and 'compute.score' need to be changed.
+#'
+
+max.likelihood.firth.rd <- function(param, y, x, va, vb, alpha.start, beta.start, weight, max.step, thres, pa, pb) {
+  ### augmentation calculation, compute:
+  #'   - the observed Fisher information matrix,
+  #'   - its inverse,
+  #'   - third‐order cumulants \(k_{s t u}\),  κ_{s,t,u} = n^{-1} * E{ U_s, U_t, U_u }
+  #'   - mixed cumulants \(k_{s, t u}\), κ_{s,tu}  = n^{-1} * E{ U_s, U_{tu}}.
+  #'
+  compute.components <- function(x, alpha.ml, beta.ml, va, vb, weight) {
+    p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml) # n by 2
+    # p0p1 = cbind(p0, p1): n * 2 matrix
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+    n <- nrow(vb)
+    pA <- p0
+    pA[x == 1] <- p1[x == 1]
+    # s0 = p0 * (1 - p0) # n by 1
+    # s1 = p1 * (1 - p1)
+    # sA = pA * (1 - pA)
+    s0 <- pmax(p0 * (1 - p0), 1e-8)
+    s1 <- pmax(p1 * (1 - p1), 1e-8)
+    sA <- pmax(pA * (1 - pA), 1e-8)
+
+
+    rho <- as.vector(tanh(va %*% alpha.ml)) # estimated risk differences n by 1
+
+    ### First order derivatives ###
+    va.1 <- 1
+    vb.1 <- 1
+    expect.dl.by.dpA <- 1 / sA # n by 1
+    dp0.by.dphi <- s0 * s1 / (s0 + s1) # n by 1
+    dp0.by.drho <- -s0 / (s0 + s1) # n by 1
+    drho.by.dalpha <- va.1 * (1 - rho^2) # n by 1
+    dphi.by.dbeta <- vb.1
+
+    dpA.by.drho <- dp0.by.drho + x # n by 1
+    dpA.by.dalpha <- drho.by.dalpha * dpA.by.drho # n by 1
+    dpA.by.dphi <- dp0.by.dphi # n by 1
+    dpA.by.dbeta <- dphi.by.dbeta * dpA.by.dphi # n by 1
+
+
+    ### Second order derivatives ###
+
+    expect.d2l.by.dpA.2 <- -(1 - 2 * pA) / sA^2 # n by 1
+    d2pA.by.drho.2 <- s0 * s1 * (2 - 2 * p0 - 2 * p1) / (s0 + s1)^3 # n by 1
+    d2pA.by.dphi.drho <- (s0 * (1 - 2 * p1) - s1 * (1 - 2 * p0)) * s0 * s1 / (s0 + s1)^3 # n by 1
+    d2pA.by.dphi.2 <- (s0^2 * (1 - 2 * p1) + s1^2 * (1 - 2 * p0)) * s0 * s1 / (s0 + s1)^3 # n by 1
+
+    d2rho.by.dalpha.2 <- -2 * va.1 * rho * drho.by.dalpha # n by 1
+
+    ### Compute elements of the Hessian matrix ###
+
+    d2l.by.dalpha.2 <- dpA.by.dalpha * expect.d2l.by.dpA.2 * weight * dpA.by.dalpha +
+      drho.by.dalpha * expect.dl.by.dpA * d2pA.by.drho.2 * weight * drho.by.dalpha -
+      2 * va.1 * rho * expect.dl.by.dpA * dpA.by.drho * weight * drho.by.dalpha # n by 1
+
+    d2l.by.dalpha.dbeta <- dpA.by.dalpha * expect.d2l.by.dpA.2 * weight * dpA.by.dbeta +
+      drho.by.dalpha * expect.dl.by.dpA * d2pA.by.dphi.drho * weight * dphi.by.dbeta
+    d2l.by.dbeta.dalpha <- d2l.by.dalpha.dbeta
+
+    d2l.by.dbeta.2 <- dpA.by.dbeta * expect.d2l.by.dpA.2 * weight * dpA.by.dbeta +
+      dphi.by.dbeta * expect.dl.by.dpA * d2pA.by.dphi.2 * weight * dphi.by.dbeta
+
+    ###
+
+
+    ##  fisher info
+
+    ## k_{s,t,u}
+    c.stu.A <- (1 - 2 * pA) / (sA^2)
+    c.stu.alpha <- (dp0.by.drho + x) * drho.by.dalpha
+    c.stu.beta <- dp0.by.dphi
+
+    k.aaa <- c.stu.A * c.stu.alpha^3
+    k.aab <- c.stu.A * c.stu.alpha^2 * c.stu.beta
+    k.abb <- c.stu.A * c.stu.alpha * c.stu.beta^2
+    k.bbb <- c.stu.A * c.stu.beta^3
+
+
+    ## k_{s,tu}
+
+
+    k.a.aa <- c.stu.alpha * d2l.by.dalpha.2
+    k.a.ab <- c.stu.alpha * d2l.by.dalpha.dbeta
+    k.a.bb <- c.stu.alpha * d2l.by.dbeta.2
+    k.b.aa <- c.stu.beta * d2l.by.dalpha.2
+    k.b.ab <- c.stu.beta * d2l.by.dalpha.dbeta
+    k.b.bb <- c.stu.beta * d2l.by.dbeta.2
+
+    expect.dl.by.dpA.squared <- 1 / sA
+    dp0.by.dphi <- s0 * s1 / (s0 + s1)
+    dp0.by.drho <- -s0 / (s0 + s1)
+    drho.by.dalpha <- va * (1 - rho^2)
+    dphi.by.dbeta <- vb
+
+    tmp <- cbind((dp0.by.drho + x) * drho.by.dalpha, dp0.by.dphi * dphi.by.dbeta)
+    fisher.info <- (t(expect.dl.by.dpA.squared * weight * tmp) %*% tmp)
+
+
+    return(list(fisher = fisher.info, fisher.invers = ginv(fisher.info), k.stu = cbind(k.aaa, k.aab, k.abb, k.bbb), k.s.tu = cbind(k.a.aa, k.a.ab, k.a.bb, k.b.aa, k.b.ab, k.b.bb)))
+  }
+
+  compute.augmentation <- function(components, va, vb) {
+    pa <- ncol(va)
+    pb <- ncol(vb)
+    n <- dim(vb)[1]
+    fisher <- components$fisher
+    k.rs <- components$fisher.invers
+    k.stu <- components$k.stu
+    k.s.tu <- components$k.s.tu
+
+    return(compute_augmentation_cpp(va, vb, fisher, k.rs, k.stu, k.s.tu))
+  }
+
+  compute.score <- function(x, alpha.ml, beta.ml, va, vb) {
+    p0p1 <- getProbRD(va %*% alpha.ml, vb %*% beta.ml)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+    n <- nrow(vb)
+    pA <- p0
+    pA[x == 1] <- p1[x == 1]
+    s0 <- p0 * (1 - p0) # n by 1
+    s1 <- p1 * (1 - p1)
+    sA <- pA * (1 - pA)
+
+    rho <- as.vector(tanh(va * alpha.ml))
+
+    score.alpha <- colSums(((y - pA) / sA) * (x - s0 / (s0 + s1)) * (1 - rho^2) * va)
+    score.beta <- colSums(((y - pA) / sA) * (s0 * s1 / (s0 + s1)) * vb)
+    return(c(score.alpha, score.beta))
+  }
+
+
+  optim.alpha <- function(alpha, beta) {
+    score.intial <- compute.score(x, alpha, beta, va, vb)
+    components <- compute.components(x, alpha, beta, va, vb, weight)
+    augment.intial <- compute.augmentation(components, va, vb)
+    return(max(abs(score.intial[1:pa] + t(augment.intial)[1:pa])))
+  }
+  optim.beta <- function(alpha, beta) {
+    score.intial <- compute.score(x, alpha, beta, va, vb)
+    components <- compute.components(x, alpha, beta, va, vb, weight)
+    augment.intial <- compute.augmentation(components, va, vb)
+    return(max(abs(score.intial[(pa + 1):(pa + pb)] + t(augment.intial)[(pa + 1):(pa + pb)])))
+  }
+
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha.start
+  beta <- beta.start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max.step) {
+    step <- step + 1
+    target.alpha <- function(alpha) {
+      optim.alpha(alpha, beta)
+    }
+    result.a <- optim(alpha, target.alpha, control = list(maxit = max(100, max.step / 10)))
+    diff1 <- Diff(result.a$par, alpha)
+    alpha <- result.a$par
+    target.beta <- function(beta) {
+      optim.beta(alpha, beta)
+    }
+    result.b <- optim(beta, target.beta, control = list(maxit = max(100, max.step / 10)))
+    diff <- max(diff1, Diff(result.b$par, beta))
+    beta <- result.b$par
+  }
+  opt <- list(par = c(alpha, beta), convergence = (step < max.step), step = step)
+
+  return(opt)
+}
diff --git a/compare/MLE_Point_Firth_for_RR.R b/compare/MLE_Point_Firth_for_RR.R
index bdafd97..afc4391 100644
--- a/compare/MLE_Point_Firth_for_RR.R
+++ b/compare/MLE_Point_Firth_for_RR.R
@@ -1,6 +1,6 @@
 #' Maximum Likelihood Estimation for Relative‐Risk models with Firth's Augmentation
 #'
-#'Firth's method: Firth, D. (1993). Bias reduction of maximum likelihood estimates. Biometrika, 80(1), 27-38.
+#' Firth's method: Firth, D. (1993). Bias reduction of maximum likelihood estimates. Biometrika, 80(1), 27-38.
 #' In the middle of page 29, the first-order bias of \hat{\theta}.
 #' Optimize the log‐likelihood for a binary‐outcome regression model on the
 #' relative‐risk (RR) scale using an iterative
@@ -25,166 +25,163 @@
 #' @param pb Integer.  Number of \eqn{\beta} parameters (\eqn{pb}).
 #'
 
-max.likelihood.firth.rr = function(param, y, x, va, vb, alpha.start, beta.start, weight, max.step, thres, pa, pb) {
-
+max.likelihood.firth.rr <- function(param, y, x, va, vb, alpha.start, beta.start, weight, max.step, thres, pa, pb) {
   ### augmentation calculation, calculate the observed values of
   # κ_{r,s}  = n^{-1} * E{ U_r, U_s }, κ_{s,t,u} = n^{-1} * E{ U_s, U_t, U_u }, and κ_{s,tu}  = n^{-1} * E{ U_s, U_{tu}}
   # with va and vb all equal to 1
-  compute.components = function(x, alpha.ml, beta.ml, va, vb, weight) {
-
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
+  compute.components <- function(x, alpha.ml, beta.ml, va, vb, weight) {
+    p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml)
     # p0p1 = cbind(p0, p1): n * 2 matrix
-    p0 = p0p1[, 1]
-    p1 = p0p1[, 2]
-    n = nrow(vb)
-    pA = p0
-    pA[x == 1] = p1[x == 1]
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+    n <- nrow(vb)
+    pA <- p0
+    pA[x == 1] <- p1[x == 1]
 
 
     ### Building blocks
 
-    dpsi0.by.dtheta = -(1 - p0)/(1 - p0 + 1 - p1)
-    dpsi0.by.dphi = (1 - p0) * (1 - p1)/(1 - p0 + 1 - p1)
+    dpsi0.by.dtheta <- -(1 - p0) / (1 - p0 + 1 - p1)
+    dpsi0.by.dphi <- (1 - p0) * (1 - p1) / (1 - p0 + 1 - p1)
 
-    dtheta.by.dalpha = 1
-    dphi.by.dbeta = 1
-
-    expect.dl.by.dpsi0 = pA/((1 - pA))
-    expect.d2l.by.dpsi0.2 = pA^2/((1 - pA)^2)
+    dtheta.by.dalpha <- 1
+    dphi.by.dbeta <- 1
 
+    expect.dl.by.dpsi0 <- pA / ((1 - pA))
+    expect.d2l.by.dpsi0.2 <- pA^2 / ((1 - pA)^2)
 
 
     ###### d2l.by.dalpha.2
 
-    d2psi0.by.dtheta.2 = ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1)/((1 - p0 + 1 - p1)^2)
+    d2psi0.by.dtheta.2 <- ((p0 - p1) * dpsi0.by.dtheta - (1 - p0) * p1) / ((1 - p0 + 1 - p1)^2)
 
-    d2l.by.dtheta.2 = expect.d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + expect.dl.by.dpsi0 *
+    d2l.by.dtheta.2 <- expect.d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x)^2 + expect.dl.by.dpsi0 *
       d2psi0.by.dtheta.2
 
-    d2l.by.dalpha.2 = dtheta.by.dalpha * d2l.by.dtheta.2 * weight *
+    d2l.by.dalpha.2 <- dtheta.by.dalpha * d2l.by.dtheta.2 * weight *
       dtheta.by.dalpha
 
 
     ###### d2l.by.dalpha.dbeta
 
-    d2psi0.by.dtheta.dphi = (1 - p0) * (1 - p1) * (p0 - p1)/(1 - p0 + 1 - p1)^3
+    d2psi0.by.dtheta.dphi <- (1 - p0) * (1 - p1) * (p0 - p1) / (1 - p0 + 1 - p1)^3
 
-    d2l.by.dtheta.dphi = expect.d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi +
+    d2l.by.dtheta.dphi <- expect.d2l.by.dpsi0.2 * (dpsi0.by.dtheta + x) * dpsi0.by.dphi +
       expect.dl.by.dpsi0 * d2psi0.by.dtheta.dphi
 
-    d2l.by.dalpha.dbeta = dtheta.by.dalpha * d2l.by.dtheta.dphi * weight *
+    d2l.by.dalpha.dbeta <- dtheta.by.dalpha * d2l.by.dtheta.dphi * weight *
       dphi.by.dbeta
-    d2l.by.dbeta.dalpha = d2l.by.dalpha.dbeta
+    d2l.by.dbeta.dalpha <- d2l.by.dalpha.dbeta
     # d2l.by.dalpha.dbeta is symmetric itself if (because) va=vb
 
 
     #### d2l.by.dbeta2
 
-    d2psi0.by.dphi.2 = (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2)/(1 - p0 + 1 -
-                                                                p1)^2) * dpsi0.by.dphi
+    d2psi0.by.dphi.2 <- (-(p0 * (1 - p1)^2 + p1 * (1 - p0)^2) / (1 - p0 + 1 -
+      p1)^2) * dpsi0.by.dphi
 
-    d2l.by.dphi.2 = expect.d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + expect.dl.by.dpsi0 * d2psi0.by.dphi.2
+    d2l.by.dphi.2 <- expect.d2l.by.dpsi0.2 * (dpsi0.by.dphi)^2 + expect.dl.by.dpsi0 * d2psi0.by.dphi.2
 
-    d2l.by.dbeta.2 = dphi.by.dbeta * d2l.by.dphi.2 * weight * dphi.by.dbeta
+    d2l.by.dbeta.2 <- dphi.by.dbeta * d2l.by.dphi.2 * weight * dphi.by.dbeta
 
 
     ###
 
 
     ##  fisher info κ_{r,s}
-    expect.dl.by.dpsi0.squared = (pA)/(1 - pA)
-    dpsi0.by.dphi = (1 - p0) * (1 - p1)/((1 - p0) + (1 - p1))
-    dpsi0.by.dtheta = -(1 - p0)/((1 - p0) + (1 - p1))
-    tmp = cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
+    expect.dl.by.dpsi0.squared <- (pA) / (1 - pA)
+    dpsi0.by.dphi <- (1 - p0) * (1 - p1) / ((1 - p0) + (1 - p1))
+    dpsi0.by.dtheta <- -(1 - p0) / ((1 - p0) + (1 - p1))
+    tmp <- cbind((dpsi0.by.dtheta + x) * va, dpsi0.by.dphi * vb)
     ## since dtheta.by.dalpha = va, and dphi.by.dbeta = vb
-    fisher.info = (t(expect.dl.by.dpsi0.squared * weight * tmp) %*% tmp)
+    fisher.info <- (t(expect.dl.by.dpsi0.squared * weight * tmp) %*% tmp)
 
     ## k_{s,t,u}
-    c.stu.A = pA*(1-2*pA)/(1-pA)^2
-    c.stu.alpha = (x - (1 - p0)/((1 - p0) + (1 - p1)))
-    c.stu.beta = (1 - p0) * (1 - p1)/((1 - p0) + (1 - p1))
+    c.stu.A <- pA * (1 - 2 * pA) / (1 - pA)^2
+    c.stu.alpha <- (x - (1 - p0) / ((1 - p0) + (1 - p1)))
+    c.stu.beta <- (1 - p0) * (1 - p1) / ((1 - p0) + (1 - p1))
 
-    k.aaa = c.stu.A*c.stu.alpha^3
-    k.aab = c.stu.A*c.stu.alpha^2*c.stu.beta
-    k.abb = c.stu.A*c.stu.alpha*c.stu.beta^2
-    k.bbb = c.stu.A*c.stu.beta^3
+    k.aaa <- c.stu.A * c.stu.alpha^3
+    k.aab <- c.stu.A * c.stu.alpha^2 * c.stu.beta
+    k.abb <- c.stu.A * c.stu.alpha * c.stu.beta^2
+    k.bbb <- c.stu.A * c.stu.beta^3
 
 
     ## k_{s,tu}
 
 
-    k.a.aa = as.vector(c.stu.alpha*d2l.by.dalpha.2)
-    k.a.ab = as.vector(c.stu.alpha*d2l.by.dalpha.dbeta)
-    k.a.bb = as.vector(c.stu.alpha*d2l.by.dbeta.2)
-    k.b.aa = as.vector(c.stu.beta*d2l.by.dalpha.2)
-    k.b.ab = as.vector(c.stu.beta*d2l.by.dalpha.dbeta)
-    k.b.bb = as.vector(c.stu.beta*d2l.by.dbeta.2)
+    k.a.aa <- as.vector(c.stu.alpha * d2l.by.dalpha.2)
+    k.a.ab <- as.vector(c.stu.alpha * d2l.by.dalpha.dbeta)
+    k.a.bb <- as.vector(c.stu.alpha * d2l.by.dbeta.2)
+    k.b.aa <- as.vector(c.stu.beta * d2l.by.dalpha.2)
+    k.b.ab <- as.vector(c.stu.beta * d2l.by.dalpha.dbeta)
+    k.b.bb <- as.vector(c.stu.beta * d2l.by.dbeta.2)
 
 
-    return(list(fisher = fisher.info,fisher.invers = solve(fisher.info),k.stu = cbind(k.aaa, k.aab, k.abb, k.bbb),k.s.tu = cbind(k.a.aa, k.a.ab, k.a.bb, k.b.aa, k.b.ab, k.b.bb)))
+    return(list(fisher = fisher.info, fisher.invers = solve(fisher.info), k.stu = cbind(k.aaa, k.aab, k.abb, k.bbb), k.s.tu = cbind(k.a.aa, k.a.ab, k.a.bb, k.b.aa, k.b.ab, k.b.bb)))
   }
 
 
   #' @param components A list as returned by \code{\link{compute.components}}.
   ### calculate κ^{r,s} κ^{t,u} (κ_{s,t,u} + κ_{s,tu}) / 2 with real va and vb. Since it is all the possible combinations of va and vb,I use "for"
 
-  compute.augmentation <- function(components,va,vb){
-    pa = ifelse(is.null(dim(va)),1,dim(va)[2])
-    pb = ncol(vb)
-    n = dim(vb)[1]
-    fisher = components$fisher
-    k.rs = components$fisher.invers
-    k.stu = components$k.stu
-    k.s.tu = components$k.s.tu
+  compute.augmentation <- function(components, va, vb) {
+    pa <- ifelse(is.null(dim(va)), 1, dim(va)[2])
+    pb <- ncol(vb)
+    n <- dim(vb)[1]
+    fisher <- components$fisher
+    k.rs <- components$fisher.invers
+    k.stu <- components$k.stu
+    k.s.tu <- components$k.s.tu
 
     return(compute_augmentation_cpp(va, vb, fisher, k.rs, k.stu, k.s.tu))
-
   }
 
   ### the score function for alpha and beta
-  compute.score <- function(x, alpha.ml, beta.ml, va, vb){
-    p0p1 = getProbRR(va %*% alpha.ml, vb %*% beta.ml)
-    n = dim(vb)[1]
-    pA = rep(NA, n)
-    pA[x == 0] = p0p1[x == 0, 1]
-    pA[x == 1] = p0p1[x == 1, 2]
-    score.alpha <- colSums(((y-pA)/(1-pA))*(x-(1-p0p1[, 1])/((1-p0p1[, 1])+(1-p0p1[, 2])))*va)
-    score.beta <- colSums(((y-pA)/(1-pA))*(1 - p0p1[, 1]) * (1 - p0p1[, 2])/((1 - p0p1[, 1]) + (1 - p0p1[, 2]))*vb)
-    return(c(score.alpha,score.beta))
+  compute.score <- function(x, alpha.ml, beta.ml, va, vb) {
+    p0p1 <- getProbRR(va %*% alpha.ml, vb %*% beta.ml)
+    n <- dim(vb)[1]
+    pA <- rep(NA, n)
+    pA[x == 0] <- p0p1[x == 0, 1]
+    pA[x == 1] <- p0p1[x == 1, 2]
+    score.alpha <- colSums(((y - pA) / (1 - pA)) * (x - (1 - p0p1[, 1]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2]))) * va)
+    score.beta <- colSums(((y - pA) / (1 - pA)) * (1 - p0p1[, 1]) * (1 - p0p1[, 2]) / ((1 - p0p1[, 1]) + (1 - p0p1[, 2])) * vb)
+    return(c(score.alpha, score.beta))
   }
 
-  optim.alpha <- function(alpha,beta){
-    score.intial = compute.score(x,alpha,beta,va,vb)
-    components = compute.components(x,alpha,beta,va,vb,weight)
-    augment.intial = compute.augmentation(components,va,vb)
+  optim.alpha <- function(alpha, beta) {
+    score.intial <- compute.score(x, alpha, beta, va, vb)
+    components <- compute.components(x, alpha, beta, va, vb, weight)
+    augment.intial <- compute.augmentation(components, va, vb)
     return(max((abs(score.intial[1:pa] + t(augment.intial)[1:pa]))))
   }
-  optim.beta <- function(alpha,beta){
-    score.intial = compute.score(x,alpha,beta,va,vb)
-    components = compute.components(x,alpha,beta,va,vb,weight)
-    augment.intial = compute.augmentation(components,va,vb)
-    return(max((abs(score.intial[(pa+1):(pa+pb)] + t(augment.intial)[(pa+1):(pa+pb)]))))
+  optim.beta <- function(alpha, beta) {
+    score.intial <- compute.score(x, alpha, beta, va, vb)
+    components <- compute.components(x, alpha, beta, va, vb, weight)
+    augment.intial <- compute.augmentation(components, va, vb)
+    return(max((abs(score.intial[(pa + 1):(pa + pb)] + t(augment.intial)[(pa + 1):(pa + pb)]))))
   }
-  Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-  alpha = alpha.start
-  beta = beta.start
-  diff = thres + 1; step = 0
-  while(diff > thres & step < max.step){
-    step = step+1
-    target.alpha <- function(a) { optim.alpha(a,beta) }
-    result.a <- optim(alpha, target.alpha,control=list(maxit=max(100,max.step/10)))
-    diff1 = Diff(result.a$par,alpha)
-    alpha = result.a$par
-    target.beta <- function(b) { optim.beta(alpha,b) }
-    result.b <- optim(beta, target.beta,control=list(maxit=max(100,max.step/10)))
-    diff  = max(diff1,Diff(result.b$par,beta))
-    beta = result.b$par
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha.start
+  beta <- beta.start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max.step) {
+    step <- step + 1
+    target.alpha <- function(a) {
+      optim.alpha(a, beta)
+    }
+    result.a <- optim(alpha, target.alpha, control = list(maxit = max(100, max.step / 10)))
+    diff1 <- Diff(result.a$par, alpha)
+    alpha <- result.a$par
+    target.beta <- function(b) {
+      optim.beta(alpha, b)
+    }
+    result.b <- optim(beta, target.beta, control = list(maxit = max(100, max.step / 10)))
+    diff <- max(diff1, Diff(result.b$par, beta))
+    beta <- result.b$par
   }
-  opt = list(par = c(alpha,beta), convergence = (step < max.step), step = step)
+  opt <- list(par = c(alpha, beta), convergence = (step < max.step), step = step)
 
   return(opt)
 }
-
-
-
-
diff --git a/compare/MLE_Point_for_jeffrey.R b/compare/MLE_Point_for_jeffrey.R
index eb6f8ad..1552854 100644
--- a/compare/MLE_Point_for_jeffrey.R
+++ b/compare/MLE_Point_for_jeffrey.R
@@ -1,123 +1,125 @@
-#' Penalized Maximum‐Likelihood Estimation
-#'
-#' Penalizing the log-likelihood function with the Jeffry's prior, but with the prior directly applied to p0,p1
-#'
-#' Compute Determinant of Fisher Information for RR/RD Model
-#'
-#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
-#' @param x Binary exposure indicator (0/1).
-#' @param alpha.ml Numeric vector of length \(p_a\).  Fitted \(\alpha\) parameters.
-#' @param beta.ml Numeric vector of length \(p_b\).  Fitted \(\beta\) parameters.
-#' @param va Numeric matrix \(n\times p_a\).  Design matrix for the \(\alpha\) component.
-#' @param vb Numeric matrix \(n\times p_b\).  Design matrix for the \(\beta\) component.
-#' @param weight Numeric vector of length \(n\).  Observation weight (not used in this simple approximation).
-#'
-### augmentation calculation
-fisher.detf = function(param, x, alpha.ml, beta.ml, va, vb, weight) {
-
-  getProb = if (param == "RR") getProbRR else getProbRD
-
-  p0p1 = getProb(va %*% alpha.ml, vb %*% beta.ml)
-  p0 = p0p1[x == 0, 1]
-  p1 = p0p1[x == 1, 2]
-
-  fisher.det = sum(1/(p0*(1-p0)))*sum(1/(p1*(1-p1)))
-  return(fisher.det)
-}
-
-
-#' Penalized Maximum‐Likelihood Estimation
-#'
-#' Penalizing the log-likelihood function with the Jeffry's prior, but with the prior directly applied to p0,p1
-#'
-#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
-#' @param y Numeric vector of length \(n\).  Binary outcomes (0/1).
-#' @param x Numeric vector of length \(n\).  Binary exposure indicator (0/1).
-#' @param va Numeric matrix \(n\times p_a\).
-#' @param vb Numeric matrix \(n\times p_b\).
-#' @param alpha.start Numeric vector of length \(p_a\).  Initial values for
-#'   the \eqn{\alpha} parameters.
-#' @param beta.start Numeric vector of length \eqn{p_b}.  Initial values for
-#'   the \eqn{\beta} parameters.
-#' @param weight Numeric vector of length \(n\).  Observation weights.
-#' @param max.step Integer.  Maximum number of alternating updates.
-#' @param thres Numeric.  Convergence threshold on relative parameter change.
-#' @param pa Integer.  Number of \(\alpha\) parameters (\(p_a\)).
-#' @param pb Integer.  Number of \(\beta\) parameters (\(p_b\)).
-#'
-
-# The difference between this file and "MLE_Point_of_estimator_for_jeffrey.R" lies in the function used to compute the Fisher information.
-# We can merge the two files by adding a conditional statement based on the value of argument "method".
-max.likelihood.jeffrey.direct = function(param, y, x, va, vb, alpha.start, beta.start, weight,
-                          max.step, thres, pa, pb) {
-
-  startpars = c(alpha.start, beta.start)
-
-  getProb = if (param == "RR") getProbRR else getProbRD
-
-  ## negative log likelihood function
-  neg.log.likelihood = function(pars) {
-    alpha = pars[1:pa]
-    beta = pars[(pa + 1):(pa + pb)]
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0 = p0p1[, 1];   p1 = p0p1[, 2]
-
-    fisher.det  = fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
-                  (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
-                                                                                  1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                       1]) * weight[x == 1])-
-             log(fisher.det)/2)
-
-  }
-
-  neg.log.likelihood.alpha = function(alpha){
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0    = p0p1[,1];  p1 = p0p1[,2]
-
-    fisher.det  = fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                  (y[x==0])*log(p0[x==0])*weight[x==0]) -
-             sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                   (y[x==1])*log(p1[x==1])*weight[x==1])-
-             log(fisher.det)/2)
-  }
-
-  neg.log.likelihood.beta = function(beta){
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0    = p0p1[,1];  p1 = p0p1[,2]
-
-    fisher.det  = fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                  (y[x==0])*log(p0[x==0])*weight[x==0]) -
-             sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                   (y[x==1])*log(p1[x==1])*weight[x==1])-
-             log(fisher.det)/2)
-  }
-
-
-  ## Optimization
-
-  Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-  alpha = alpha.start; beta = beta.start
-  diff = thres + 1; step = 0
-  while(diff > thres & step < max.step){
-    step = step + 1
-    opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-    diff1 = Diff(opt1$par,alpha)
-    alpha = opt1$par
-    opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-    diff  = max(diff1,Diff(opt2$par,beta))
-    beta = opt2$par
-  }
-
-  opt = list(par = c(alpha,beta), convergence = (step < max.step),
-             value = neg.log.likelihood(c(alpha,beta)), step = step)
-
-  return(opt)
-}
-
-
+#' Penalized Maximum‐Likelihood Estimation
+#'
+#' Penalizing the log-likelihood function with the Jeffry's prior, but with the prior directly applied to p0,p1
+#'
+#' Compute Determinant of Fisher Information for RR/RD Model
+#'
+#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
+#' @param x Binary exposure indicator (0/1).
+#' @param alpha.ml Numeric vector of length \(p_a\).  Fitted \(\alpha\) parameters.
+#' @param beta.ml Numeric vector of length \(p_b\).  Fitted \(\beta\) parameters.
+#' @param va Numeric matrix \(n\times p_a\).  Design matrix for the \(\alpha\) component.
+#' @param vb Numeric matrix \(n\times p_b\).  Design matrix for the \(\beta\) component.
+#' @param weight Numeric vector of length \(n\).  Observation weight (not used in this simple approximation).
+#'
+### augmentation calculation
+fisher.detf <- function(param, x, alpha.ml, beta.ml, va, vb, weight) {
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  p0p1 <- getProb(va %*% alpha.ml, vb %*% beta.ml)
+  p0 <- p0p1[x == 0, 1]
+  p1 <- p0p1[x == 1, 2]
+
+  fisher.det <- sum(1 / (p0 * (1 - p0))) * sum(1 / (p1 * (1 - p1)))
+  return(fisher.det)
+}
+
+
+#' Penalized Maximum‐Likelihood Estimation
+#'
+#' Penalizing the log-likelihood function with the Jeffry's prior, but with the prior directly applied to p0,p1
+#'
+#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
+#' @param y Numeric vector of length \(n\).  Binary outcomes (0/1).
+#' @param x Numeric vector of length \(n\).  Binary exposure indicator (0/1).
+#' @param va Numeric matrix \(n\times p_a\).
+#' @param vb Numeric matrix \(n\times p_b\).
+#' @param alpha.start Numeric vector of length \(p_a\).  Initial values for
+#'   the \eqn{\alpha} parameters.
+#' @param beta.start Numeric vector of length \eqn{p_b}.  Initial values for
+#'   the \eqn{\beta} parameters.
+#' @param weight Numeric vector of length \(n\).  Observation weights.
+#' @param max.step Integer.  Maximum number of alternating updates.
+#' @param thres Numeric.  Convergence threshold on relative parameter change.
+#' @param pa Integer.  Number of \(\alpha\) parameters (\(p_a\)).
+#' @param pb Integer.  Number of \(\beta\) parameters (\(p_b\)).
+#'
+
+# The difference between this file and "MLE_Point_of_estimator_for_jeffrey.R" lies in the function used to compute the Fisher information.
+# We can merge the two files by adding a conditional statement based on the value of argument "method".
+max.likelihood.jeffrey.direct <- function(param, y, x, va, vb, alpha.start, beta.start, weight,
+                                          max.step, thres, pa, pb) {
+  startpars <- c(alpha.start, beta.start)
+
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  ## negative log likelihood function
+  neg.log.likelihood <- function(pars) {
+    alpha <- pars[1:pa]
+    beta <- pars[(pa + 1):(pa + pb)]
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    fisher.det <- fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
+      1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
+      1]) * weight[x == 1]) -
+      log(fisher.det) / 2)
+  }
+
+  neg.log.likelihood.alpha <- function(alpha) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    fisher.det <- fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]) -
+      log(fisher.det) / 2)
+  }
+
+  neg.log.likelihood.beta <- function(beta) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    fisher.det <- fisher.detf(param, x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]) -
+      log(fisher.det) / 2)
+  }
+
+
+  ## Optimization
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha.start
+  beta <- beta.start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max.step) {
+    step <- step + 1
+    opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+    diff1 <- Diff(opt1$par, alpha)
+    alpha <- opt1$par
+    opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+    diff <- max(diff1, Diff(opt2$par, beta))
+    beta <- opt2$par
+  }
+
+  opt <- list(
+    par = c(alpha, beta), convergence = (step < max.step),
+    value = neg.log.likelihood(c(alpha, beta)), step = step
+  )
+
+  return(opt)
+}
diff --git a/compare/MLE_Point_of_estimator_for_jeffrey.R b/compare/MLE_Point_of_estimator_for_jeffrey.R
index 78766f8..8b59570 100644
--- a/compare/MLE_Point_of_estimator_for_jeffrey.R
+++ b/compare/MLE_Point_of_estimator_for_jeffrey.R
@@ -1,94 +1,99 @@
-#' Penalized Maximum‐Likelihood Estimation
-#'
-#' Penalizing the log-likelihood function with the Jeffry's prior
-#'
-#' Alternating coordinate‐descent updates are used to optimize over
-#' \eqn{\alpha} and \eqn{\beta} in turn.
-#'
-#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
-#' @param y Numeric vector of length \eqn{n}.  Binary outcomes (0/1).
-#' @param x Numeric vector of length \eqn{n}.  Binary exposure indicator (0/1).
-#' @param va Numeric matrix \eqn{n\times p_a}.
-#' @param vb Numeric matrix \eqn{n\times p_b}.
-#' @param alpha.start Numeric vector of length \eqn{p_a}.  Initial values for
-#'   the \eqn{\alpha} parameters.
-#' @param beta.start Numeric vector of length \eqn{p_b}.  Initial values for
-#'   the \eqn{\beta} parameters.
-#' @param weight Numeric vector of length \eqn{n}.  Observation weight.
-#' @param max.step Integer.  Maximum number of alternating coordinate‐descent
-#'   iterations.
-#' @param thres Numeric.  Convergence threshold on relative parameter change.
-#' @param pa Integer.  Number of \eqn{\alpha} parameters (\eqn{p_a}).
-#' @param pb Integer.  Number of \eqn{\beta} parameters (\eqn{p_b}).
-#'
-max.likelihood.jeffrey = function(param, y, x, va, vb, alpha.start, beta.start, weight,
-                          max.step, thres, pa, pb) {
-
-  startpars = c(alpha.start, beta.start)
-
-  getProb = if (param == "RR") getProbRR else getProbRD
-
-  ## negative log likelihood function
-  neg.log.likelihood = function(pars) {
-    alpha = pars[1:pa]
-    beta = pars[(pa + 1):(pa + pb)]
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0 = p0p1[, 1];   p1 = p0p1[, 2]
-
-   if (param == "RR") fisher  = var.mle.rr (x, alpha.start, beta.start, va, vb, weight) else  fisher  = var.mle.rd (x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
-                  (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
-                                                                                  1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
-                                                                                                                                                       1]) * weight[x == 1])+
-             log(det(fisher))/2) ### add the Jeffrey's prior
-
-  }
-
-  neg.log.likelihood.alpha = function(alpha){
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0    = p0p1[,1];  p1 = p0p1[,2]
-
-    if (param == "RR") fisher  = var.mle.rr (x, alpha.start, beta.start, va, vb, weight) else  fisher  = var.mle.rd (x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                  (y[x==0])*log(p0[x==0])*weight[x==0]) -
-             sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                   (y[x==1])*log(p1[x==1])*weight[x==1])+
-             log(det(fisher))/2)  ### add the Jeffrey's prior
-  }
-
-  neg.log.likelihood.beta = function(beta){
-    p0p1 = getProb(va %*% alpha, vb %*% beta)
-    p0    = p0p1[,1];  p1 = p0p1[,2]
-
-    if (param == "RR") fisher  = var.mle.rr (x, alpha.start, beta.start, va, vb, weight) else  fisher  = var.mle.rd (x, alpha.start, beta.start, va, vb, weight)
-
-    return(-sum((1-y[x==0])*log(1-p0[x==0])*weight[x==0] +
-                  (y[x==0])*log(p0[x==0])*weight[x==0]) -
-             sum((1-y[x==1])*log(1-p1[x==1])*weight[x==1] +
-                   (y[x==1])*log(p1[x==1])*weight[x==1])+
-             log(det(fisher))/2)  ### add the Jeefrey's prior
-  }
-
-
-  ## Optimization
-
-  Diff = function(x,y) sum((x-y)^2)/sum(x^2+thres)
-  alpha = alpha.start; beta = beta.start
-  diff = thres + 1; step = 0
-  while(diff > thres & step < max.step){
-    step = step + 1
-    opt1 = stats::optim(alpha,neg.log.likelihood.alpha,control=list(maxit=max(100,max.step/10)))
-    diff1 = Diff(opt1$par,alpha)
-    alpha = opt1$par
-    opt2 = stats::optim(beta,neg.log.likelihood.beta,control=list(maxit=max(100,max.step/10)))
-    diff  = max(diff1,Diff(opt2$par,beta))
-    beta = opt2$par
-  }
-
-  opt = list(par = c(alpha,beta), convergence = (step < max.step),
-             value = neg.log.likelihood(c(alpha,beta)), step = step)
-
-  return(opt)
-}
+#' Penalized Maximum‐Likelihood Estimation
+#'
+#' Penalizing the log-likelihood function with the Jeffry's prior
+#'
+#' Alternating coordinate‐descent updates are used to optimize over
+#' \eqn{\alpha} and \eqn{\beta} in turn.
+#'
+#' @param param Character scalar, either \code{"RR"} or \code{"RD"}.
+#' @param y Numeric vector of length \eqn{n}.  Binary outcomes (0/1).
+#' @param x Numeric vector of length \eqn{n}.  Binary exposure indicator (0/1).
+#' @param va Numeric matrix \eqn{n\times p_a}.
+#' @param vb Numeric matrix \eqn{n\times p_b}.
+#' @param alpha.start Numeric vector of length \eqn{p_a}.  Initial values for
+#'   the \eqn{\alpha} parameters.
+#' @param beta.start Numeric vector of length \eqn{p_b}.  Initial values for
+#'   the \eqn{\beta} parameters.
+#' @param weight Numeric vector of length \eqn{n}.  Observation weight.
+#' @param max.step Integer.  Maximum number of alternating coordinate‐descent
+#'   iterations.
+#' @param thres Numeric.  Convergence threshold on relative parameter change.
+#' @param pa Integer.  Number of \eqn{\alpha} parameters (\eqn{p_a}).
+#' @param pb Integer.  Number of \eqn{\beta} parameters (\eqn{p_b}).
+#'
+max.likelihood.jeffrey <- function(param, y, x, va, vb, alpha.start, beta.start, weight,
+                                   max.step, thres, pa, pb) {
+  startpars <- c(alpha.start, beta.start)
+
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  ## negative log likelihood function
+  neg.log.likelihood <- function(pars) {
+    alpha <- pars[1:pa]
+    beta <- pars[(pa + 1):(pa + pb)]
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    if (param == "RR") fisher <- var.mle.rr(x, alpha.start, beta.start, va, vb, weight) else fisher <- var.mle.rd(x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) - sum((1 - y[x ==
+      1]) * log(1 - p1[x == 1]) * weight[x == 1] + (y[x == 1]) * log(p1[x ==
+      1]) * weight[x == 1]) +
+      log(det(fisher)) / 2) ### add the Jeffrey's prior
+  }
+
+  neg.log.likelihood.alpha <- function(alpha) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    if (param == "RR") fisher <- var.mle.rr(x, alpha.start, beta.start, va, vb, weight) else fisher <- var.mle.rd(x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]) +
+      log(det(fisher)) / 2) ### add the Jeffrey's prior
+  }
+
+  neg.log.likelihood.beta <- function(beta) {
+    p0p1 <- getProb(va %*% alpha, vb %*% beta)
+    p0 <- p0p1[, 1]
+    p1 <- p0p1[, 2]
+
+    if (param == "RR") fisher <- var.mle.rr(x, alpha.start, beta.start, va, vb, weight) else fisher <- var.mle.rd(x, alpha.start, beta.start, va, vb, weight)
+
+    return(-sum((1 - y[x == 0]) * log(1 - p0[x == 0]) * weight[x == 0] +
+      (y[x == 0]) * log(p0[x == 0]) * weight[x == 0]) -
+      sum((1 - y[x == 1]) * log(1 - p1[x == 1]) * weight[x == 1] +
+        (y[x == 1]) * log(p1[x == 1]) * weight[x == 1]) +
+      log(det(fisher)) / 2) ### add the Jeefrey's prior
+  }
+
+
+  ## Optimization
+
+  Diff <- function(x, y) sum((x - y)^2) / sum(x^2 + thres)
+  alpha <- alpha.start
+  beta <- beta.start
+  diff <- thres + 1
+  step <- 0
+  while (diff > thres & step < max.step) {
+    step <- step + 1
+    opt1 <- stats::optim(alpha, neg.log.likelihood.alpha, control = list(maxit = max(100, max.step / 10)))
+    diff1 <- Diff(opt1$par, alpha)
+    alpha <- opt1$par
+    opt2 <- stats::optim(beta, neg.log.likelihood.beta, control = list(maxit = max(100, max.step / 10)))
+    diff <- max(diff1, Diff(opt2$par, beta))
+    beta <- opt2$par
+  }
+
+  opt <- list(
+    par = c(alpha, beta), convergence = (step < max.step),
+    value = neg.log.likelihood(c(alpha, beta)), step = step
+  )
+
+  return(opt)
+}
diff --git a/compare/analyze_simulation.R b/compare/analyze_simulation.R
index a211860..ec50681 100644
--- a/compare/analyze_simulation.R
+++ b/compare/analyze_simulation.R
@@ -1,333 +1,365 @@
-
-### input param, event, hypothesis, n, and R used in 'run_simulation'
-
-library(reshape2)
-library(ggplot2)
-library(ggsci)
-library(dplyr)
-library(tidyr)
-library(matrixStats)
-
-est.result <- function(df,para.true){
-  est <-  df$estimate
-  bias <- mean(est)-para.true
-  
-  se <- df$se
-  se.est<- mean(se)/sqrt(length(se))
-  
-  sd.est<- mean(se)
-  sd.mc <- sd(est)
-  acc <- sd.est/sd.mc
-  
-  low <- df$low
-  up <- df$up
-  cov <- mean((low<para.true)*(up>para.true))
-  
-  p <- mean(df$p<=0.05)
-  
-  estm.ml <- c(bias,se.est,acc,cov,p)
-  return(estm.ml)
-}
-
-plot_metric <- function(df, metric, title, value, ymax = 1, file = NULL) {
-  # 
-  df <- df %>%
-    mutate(y_plot = pmin(!!sym(metric), ymax))   
-  
-  p <- ggplot(df, aes(x = reorder(method, !!sym(metric)), y = y_plot)) +
-    geom_col(fill = "steelblue") +
-    geom_hline(yintercept = value, linetype = "dashed", color = "grey60") +
-    
-    
-    geom_text(
-      data = subset(df, !!sym(metric) > ymax),
-      aes(label = sprintf("%.2f", !!sym(metric)), y = ymax),
-      vjust = -0.3, size = 3.5, color = "red"
-    ) +
-    
-    coord_cartesian(ylim = c(0, ymax * 1.05)) +   
-    theme_minimal(base_size = 12) +
-    theme(axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
-    labs(x = "Method", y = metric, title = title)
-  
-  if (!is.null(file))
-    ggsave(file, p, width = 8, height = 6, dpi = 300)
-  return(p)
-}
-
-## read data
-data <- read.csv(paste0("simulation_results_",param,"_",event,"_",hypothesis,"_n_", n, "_R_", R,".csv"))
-
-
-if(param == "RR"){
-  data.brm <- data[(seq_len(nrow(data)) %% 14) == 1,]
-  data.brm.ad <- data[(seq_len(nrow(data)) %% 14) == 2,]
-  data.CMH <- data[(seq_len(nrow(data)) %% 14) == 3,]
-  data.lb <- data[(seq_len(nrow(data)) %% 14) == 4,]
-  data.lp <- data[(seq_len(nrow(data)) %% 14) == 5,]
-  data.rlp <- data[(seq_len(nrow(data)) %% 14) == 6,]
-  data.firth <- data[(seq_len(nrow(data)) %% 14) == 7,]
-  data.exact <- data[(seq_len(nrow(data)) %% 14) == 8,]
-  data.exact.ad <- data[(seq_len(nrow(data)) %% 14) == 9,]
-  data.GC <- data[(seq_len(nrow(data)) %% 14) == 10,]
-  data.GC.BR <- data[(seq_len(nrow(data)) %% 14) == 11,]
-  data.GC.FC <- data[(seq_len(nrow(data)) %% 14) == 12,]
-  data.GC.FC.BR1 <- data[(seq_len(nrow(data)) %% 14) == 13,]
-  data.GC.FC.BR2 <- data[(seq_len(nrow(data)) %% 14) == 0,]
-  num_cols <- sapply(data.CMH, is.numeric)
-  df.CMH <- data.CMH[ apply(data.CMH[ , num_cols], 1, function(x) all(is.finite(x))), ]
-  num_cols <- sapply(data.GC, is.numeric)
-  df.GC <- data.GC[ apply(data.GC[ , num_cols], 1, function(x) all(is.finite(x))), ]
-  num_cols <- sapply(data.GC.BR, is.numeric)
-  df.GC.BR <- data.GC.BR[ apply(data.GC.BR[ , num_cols], 1, function(x) all(is.finite(x))), ]
-}else{
-  data.brm <- data[(seq_len(nrow(data)) %% 14) == 1,]
-  data.brm.ad <- data[(seq_len(nrow(data)) %% 14) == 2,]
-  data.bayes <- data[(seq_len(nrow(data)) %% 14) == 3,]
-  data.glm <- data[(seq_len(nrow(data)) %% 14) == 4,]
-  data.lpm <- data[(seq_len(nrow(data)) %% 14) == 5,]
-  data.MN <- data[(seq_len(nrow(data)) %% 14) == 6,]
-  data.firth <- data[(seq_len(nrow(data)) %% 14) == 7,]
-  data.exact <- data[(seq_len(nrow(data)) %% 14) == 8,]
-  data.exact.ad <- data[(seq_len(nrow(data)) %% 14) == 9,]
-  data.GC <- data[(seq_len(nrow(data)) %% 14) == 10,]
-  data.GC.BR <- data[(seq_len(nrow(data)) %% 14) == 11,]
-  data.GC.FC <- data[(seq_len(nrow(data)) %% 14) == 12,]
-  data.GC.FC.BR1 <- data[(seq_len(nrow(data)) %% 14) == 13,]
-  data.GC.FC.BR2 <- data[(seq_len(nrow(data)) %% 14) == 0,]
-  
-  num_cols <- sapply(data.GC, is.numeric)
-  df.GC <- data.GC[ apply(data.GC[ , num_cols], 1, function(x) all(is.finite(x))), ]
-  num_cols <- sapply(data.GC.BR, is.numeric)
-  df.GC.BR <- data.GC.BR[ apply(data.GC.BR[ , num_cols], 1, function(x) all(is.finite(x))), ]
-  num_cols <- sapply(data.lpm, is.numeric)
-  df.lpm <- data.lpm[ apply(data.lpm[ , num_cols], 1, function(x) all(is.finite(x))), ]
-}
-
-## true value
-if(param == "RR"){
-  if (event == "common"){
-    if (hypothesis == "null"){
-      alpha.true <- 0
-      beta.true  <- c(1.5, 0.6)
-      gamma.true <- c(0.2, -0.5)
-    }else{
-      alpha.true <- 0.3
-      beta.true  <- c(1.65, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }
-  }else{
-    if (hypothesis == "null"){
-      alpha.true <- 0
-      beta.true  <- c(-4.7, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }else{
-      alpha.true <- 0.7
-      beta.true  <- c(-5.5, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }
-  }
-}else{
-  if (event == "common"){
-    if (hypothesis == "null"){
-      alpha.true = 0
-      beta.true   = c(0.9,0.5)
-      gamma.true  = c(0.2,-0.5)
-    }else{
-      alpha.true = 0.1
-      beta.true   = c(0.9,0.2)
-      gamma.true  = c(0.2,-0.5)
-    }
-  }else{
-    if (hypothesis == "null"){
-      alpha.true = 0
-      beta.true   = c(-4.5,0.5)
-      gamma.true  = c(0.2,-0.5)
-    }else{
-      alpha.true = 0.05
-      beta.true   = c(-5.5,0.2)
-      gamma.true  = c(0.2,-0.5)# rare
-    }
-  }
-}
-
-## results 
-if(param == "RR"){
-  result.brm <- est.result(data.brm,alpha.true)
-  result.brm_b <- est.result(data.brm.ad,alpha.true)
-  result.CMH <- est.result(df.CMH,alpha.true)
-  result.LB <- est.result(data.lb,alpha.true)
-  result.LP <- est.result(data.lp,alpha.true)
-  result.RLP <- est.result(data.rlp,alpha.true)
-  result.brm.FC <- est.result(data.firth,alpha.true)
-  result.brm.BC <- est.result(data.exact,alpha.true)
-  result.brm_b.BC <- est.result(data.exact.ad,alpha.true)
-  result.GC <- est.result(data.GC,alpha.true)
-  result.GC.BR <- est.result(data.GC.BR,alpha.true)
-  result.GC.FC <- est.result(data.GC.FC,alpha.true)
-  result.GC.FC.BR1 <- est.result(data.GC.FC.BR1,alpha.true)
-  result.GC.FC.BR2 <- est.result(data.GC.FC.BR2,alpha.true)
-  
-  result <- cbind(result.brm, result.CMH, result.LB, result.LP,
-                  result.RLP, result.brm.FC, result.brm.BC,result.brm_b,
-                  result.brm_b.BC, result.GC,result.GC.BR,result.GC.FC
-                  ,result.GC.FC.BR1,result.GC.FC.BR2)
-  
-  rownames(result) <- c("bias", "se", "acc", "coverage", "p")
-  colnames(result) <- c("brm","CMH","LB","LP","RLP","brm-FC","brm-BC",
-                        "brm_b","brm_b-BC","GC","GC-BR","GC-FC","GC-FC-BR1","GC-FC-BR2")
-}else{
-  result.brm <- est.result(data.brm,alpha.true)
-  result.brm_b <- est.result(data.brm.ad,alpha.true)
-  result.bayesian <- est.result(data.bayes,alpha.true)
-  result.GLM <- est.result(data.glm,alpha.true)
-  result.LPM <- est.result(df.lpm,alpha.true)
-  result.MN<- est.result(data.MN,alpha.true)
-  result.brm.FC <- est.result(data.firth,alpha.true)
-  result.brm.BC <- est.result(data.exact,alpha.true)
-  result.brm_b.BC <- est.result(data.exact.ad,alpha.true)
-  result.GC <- est.result(df.GC,alpha.true)
-  result.GC.BR <- est.result(df.GC.BR,alpha.true)
-  result.GC.FC <- est.result(data.GC.FC,alpha.true)
-  result.GC.FC.BR1 <- est.result(data.GC.FC.BR1,alpha.true)
-  result.GC.FC.BR2 <- est.result(data.GC.FC.BR2,alpha.true)
-  
-  result <- cbind(result.brm,result.bayesian, result.GLM, result.LPM,
-                  result.MN, result.brm.FC,result.brm.BC,result.brm_b,result.brm_b.BC,
-                  result.GC,result.GC.BR,result.GC.FC,result.GC.FC.BR1,result.GC.FC.BR2)
-  
-  rownames(result) <- c("bias", "se", "acc", "coverage", "p")
-}
-
-write.csv(result, paste0("result_", param, "_", n, "_", event, "_",hypothesis,".csv"))
-## plots
-
-if (param == "RR"){
-  data_names <- c("data.brm", "data.CMH", "data.lb", "data.lp",
-                  "data.rlp", "data.firth", "data.exact","data.brm.ad",
-                  "data.exact.ad", "data.GC","data.GC.BR","data.GC.FC"
-                  ,"data.GC.FC.BR1","data.GC.FC.BR2")
-  
-  data_list <- mget(data_names)
-  
-  est <- do.call(cbind, lapply(data_list, `[[`, "estimate"))
-  colnames(est) <- c("brm","CMH","LB","LP","RLP","brm-FC","brm-BC",
-                     "brm_b","brm_b-BC","GC","GC-BR","GC-FC","GC-FC-BR1","GC-FC-BR2")
-  
-  se <- do.call(cbind, lapply(data_list, `[[`, "se"))
-  colnames(se) <- c("brm","CMH","LB","LP","RLP","brm-FC","brm-BC",
-                    "brm_b","brm_b-BC","GC","GC-BR","GC-FC","GC-FC-BR1","GC-FC-BR2")
-  
-}else{
-  data_names <- c("data.brm", "data.brm.ad", "data.bayes", "data.glm",
-                  "data.lpm", "data.MN", "data.firth", "data.exact",
-                  "data.exact.ad", "data.GC", "data.GC.BR" , "data.GC.FC",
-                  "data.GC.FC.BR1","data.GC.FC.BR2")
-  data_list <- mget(data_names)
-  
-  est <- do.call(cbind, lapply(data_list, `[[`, "estimate"))
-  colnames(est) <- c("brm","brm_b","bayesian","GLM","LPM","MN","brm-FC","brm-BC",
-                     "brm_b-BC","GC","GC-BR","GC-FC","GC-FC-BR1","GC-FC-BR2")
-  
-  se <- do.call(cbind, lapply(data_list, `[[`, "se"))
-  colnames(se) <- c("brm","brm_b","bayesian","GLM","LPM","MN","brm-FC","brm-BC",
-                     "brm_b-BC","GC","GC-BR","GC-FC","GC-FC-BR1","GC-FC-BR2")
-}
-
-
-est_df <- as.data.frame(est) 
-est_long <- melt(est, variable.name = "method", value.name = "estimate")
-colnames(est_long) <- c("number","method","estimate")
-
-removed_counts <- est_long %>%
-  group_by(method) %>%
-  summarise(removed = sum(is.na(estimate) | abs(estimate) >= 5))
-
-est_long_small <- est_long %>% filter(!is.na(estimate), abs(estimate) < 5)
-
-
-p1 = ggplot(est_long_small, aes(x = method, y = estimate, fill = method)) +
-  #geom_violin(trim = FALSE, alpha = 0.7) +
-  geom_boxplot(color = "grey30",width = 0.6, outlier.size = 0.5, alpha = 0.9) +
-  ggsci::scale_fill_d3(palette = "category20") +
-  theme_minimal(base_size = 14) +
-  geom_hline(yintercept = alpha.true, linetype = "dashed", color = "steelblue") + 
-  stat_summary(fun = mean, geom = "point", shape = 21, size = 1.5, fill = "white", color = "black") + 
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        legend.position = "none") +
-  labs(title = paste0("Monte Carlo when n = ", n),
-       x = "Method", y = "Estimate")
-
-stats <- est_long_small %>%
-  group_by(method) %>%
-  summarise(ypos = max(estimate, na.rm = TRUE))
-
-
-removed_counts <- left_join(removed_counts, stats, by = "method")
-
-
-p2 = p1 + geom_text(data = removed_counts,
-                    aes(x = method, y = ypos + 0.2, 
-                        label = paste0("(", removed,")")),
-                    inherit.aes = FALSE,
-                    size = 3.5, color = "steelblue")
-
-if(sum(removed_counts$removed)==0){
-  ggsave(paste0("est_",param,"_",event,"_",hypothesis,"_n_", n,".png"), p1, width = 12, height = 6, dpi = 300)
-}else{
-  ggsave(paste0("est_",param,"_",event,"_",hypothesis,"_n_", n,".png"), p2, width = 12, height = 6, dpi = 300)
-}
-
-### plot of SE
-
-se_df <- as.data.frame(se) 
-se_long <- melt(se, variable.name = "method", value.name = "SE")
-colnames(se_long) <- c("number","method","SE")
-
-removed_counts <- se_long %>%
-  group_by(method) %>%
-  summarise(removed = sum(is.na(SE) | SE >= 5))
-
-
-se_long_small <- se_long %>% filter(!is.na(SE), SE < 5)
-
-p3 <- ggplot(se_long_small, aes(x = method, y = SE, fill = method)) +
-  geom_boxplot(alpha = 0.7, outlier.size = 0.5) +
-  theme_minimal(base_size = 14) +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        legend.position = "none") +
-  labs(title = paste0("Boxplot of RR for n = ", n),
-       x = "Method", y = "SE")
-
-stats <- se_long_small %>%
-  group_by(method) %>%
-  summarise(ypos = max(SE, na.rm = TRUE))
-
-removed_counts <- left_join(removed_counts, stats, by = "method")
-
-p4 = p3 + geom_text(data = removed_counts,
-                   aes(x = method, y = ypos + 0.2, 
-                       label = paste0("(", removed,")")),
-                   inherit.aes = FALSE,
-                   size = 3.5, color = "steelblue")
-
-if(sum(removed_counts$removed)==0){
-  ggsave(paste0("se_",param,"_",event,"_",hypothesis,"_n_", n,".png"), p3, width = 12, height = 6, dpi = 300)
-}else{
-  ggsave(paste0("se_",param,"_",event,"_",hypothesis,"_n_", n,".png"), p4, width = 12, height = 6, dpi = 300)
-}
-
-### bar for accuracy, coverage, and p-value
-df <- as.data.frame(t(result))
-df$method <- sub("^result\\.", "", rownames(df))
-rownames(df) <- NULL
-
-num_cols <- c("bias","se","acc","coverage","p")
-df <- df %>% mutate(across(all_of(num_cols), as.numeric))
-
-p_acc <- plot_metric(df, "acc",      paste0("Barplot of accuarcy at n = ",n), 1, 1.5,
-                     paste0("accuracy ", param," ", event," ",hypothesis, " n = ",n, ".png"))
-p_cov <- plot_metric(df, "coverage", paste0("Barplot of coverage at n = ",n), 0.95,1.1,
-                     paste0("coverage ", param," ", event," ",hypothesis, " n = ",n, ".png"))
-
-
+### input param, event, hypothesis, n, and R used in 'run_simulation'
+
+library(reshape2)
+library(ggplot2)
+library(ggsci)
+library(dplyr)
+library(tidyr)
+library(matrixStats)
+
+est.result <- function(df, para.true) {
+  est <- df$estimate
+  bias <- mean(est) - para.true
+
+  se <- df$se
+  se.est <- mean(se) / sqrt(length(se))
+
+  sd.est <- mean(se)
+  sd.mc <- sd(est)
+  acc <- sd.est / sd.mc
+
+  low <- df$low
+  up <- df$up
+  cov <- mean((low < para.true) * (up > para.true))
+
+  p <- mean(df$p <= 0.05)
+
+  estm.ml <- c(bias, se.est, acc, cov, p)
+  return(estm.ml)
+}
+
+plot_metric <- function(df, metric, title, value, ymax = 1, file = NULL) {
+  #
+  df <- df %>%
+    mutate(y_plot = pmin(!!sym(metric), ymax))
+
+  p <- ggplot(df, aes(x = reorder(method, !!sym(metric)), y = y_plot)) +
+    geom_col(fill = "steelblue") +
+    geom_hline(yintercept = value, linetype = "dashed", color = "grey60") +
+    geom_text(
+      data = subset(df, !!sym(metric) > ymax),
+      aes(label = sprintf("%.2f", !!sym(metric)), y = ymax),
+      vjust = -0.3, size = 3.5, color = "red"
+    ) +
+    coord_cartesian(ylim = c(0, ymax * 1.05)) +
+    theme_minimal(base_size = 12) +
+    theme(axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) +
+    labs(x = "Method", y = metric, title = title)
+
+  if (!is.null(file)) {
+    ggsave(file, p, width = 8, height = 6, dpi = 300)
+  }
+  return(p)
+}
+
+## read data
+data <- read.csv(paste0("simulation_results_", param, "_", event, "_", hypothesis, "_n_", n, "_R_", R, ".csv"))
+
+
+if (param == "RR") {
+  data.brm <- data[(seq_len(nrow(data)) %% 14) == 1, ]
+  data.brm.ad <- data[(seq_len(nrow(data)) %% 14) == 2, ]
+  data.CMH <- data[(seq_len(nrow(data)) %% 14) == 3, ]
+  data.lb <- data[(seq_len(nrow(data)) %% 14) == 4, ]
+  data.lp <- data[(seq_len(nrow(data)) %% 14) == 5, ]
+  data.rlp <- data[(seq_len(nrow(data)) %% 14) == 6, ]
+  data.firth <- data[(seq_len(nrow(data)) %% 14) == 7, ]
+  data.exact <- data[(seq_len(nrow(data)) %% 14) == 8, ]
+  data.exact.ad <- data[(seq_len(nrow(data)) %% 14) == 9, ]
+  data.GC <- data[(seq_len(nrow(data)) %% 14) == 10, ]
+  data.GC.BR <- data[(seq_len(nrow(data)) %% 14) == 11, ]
+  data.GC.FC <- data[(seq_len(nrow(data)) %% 14) == 12, ]
+  data.GC.FC.BR1 <- data[(seq_len(nrow(data)) %% 14) == 13, ]
+  data.GC.FC.BR2 <- data[(seq_len(nrow(data)) %% 14) == 0, ]
+  num_cols <- sapply(data.CMH, is.numeric)
+  df.CMH <- data.CMH[apply(data.CMH[, num_cols], 1, function(x) all(is.finite(x))), ]
+  num_cols <- sapply(data.GC, is.numeric)
+  df.GC <- data.GC[apply(data.GC[, num_cols], 1, function(x) all(is.finite(x))), ]
+  num_cols <- sapply(data.GC.BR, is.numeric)
+  df.GC.BR <- data.GC.BR[apply(data.GC.BR[, num_cols], 1, function(x) all(is.finite(x))), ]
+} else {
+  data.brm <- data[(seq_len(nrow(data)) %% 14) == 1, ]
+  data.brm.ad <- data[(seq_len(nrow(data)) %% 14) == 2, ]
+  data.bayes <- data[(seq_len(nrow(data)) %% 14) == 3, ]
+  data.glm <- data[(seq_len(nrow(data)) %% 14) == 4, ]
+  data.lpm <- data[(seq_len(nrow(data)) %% 14) == 5, ]
+  data.MN <- data[(seq_len(nrow(data)) %% 14) == 6, ]
+  data.firth <- data[(seq_len(nrow(data)) %% 14) == 7, ]
+  data.exact <- data[(seq_len(nrow(data)) %% 14) == 8, ]
+  data.exact.ad <- data[(seq_len(nrow(data)) %% 14) == 9, ]
+  data.GC <- data[(seq_len(nrow(data)) %% 14) == 10, ]
+  data.GC.BR <- data[(seq_len(nrow(data)) %% 14) == 11, ]
+  data.GC.FC <- data[(seq_len(nrow(data)) %% 14) == 12, ]
+  data.GC.FC.BR1 <- data[(seq_len(nrow(data)) %% 14) == 13, ]
+  data.GC.FC.BR2 <- data[(seq_len(nrow(data)) %% 14) == 0, ]
+
+  num_cols <- sapply(data.GC, is.numeric)
+  df.GC <- data.GC[apply(data.GC[, num_cols], 1, function(x) all(is.finite(x))), ]
+  num_cols <- sapply(data.GC.BR, is.numeric)
+  df.GC.BR <- data.GC.BR[apply(data.GC.BR[, num_cols], 1, function(x) all(is.finite(x))), ]
+  num_cols <- sapply(data.lpm, is.numeric)
+  df.lpm <- data.lpm[apply(data.lpm[, num_cols], 1, function(x) all(is.finite(x))), ]
+}
+
+## true value
+if (param == "RR") {
+  if (event == "common") {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(1.5, 0.6)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.3
+      beta.true <- c(1.65, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    }
+  } else {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(-4.7, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.7
+      beta.true <- c(-5.5, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    }
+  }
+} else {
+  if (event == "common") {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(0.9, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.1
+      beta.true <- c(0.9, 0.2)
+      gamma.true <- c(0.2, -0.5)
+    }
+  } else {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(-4.5, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.05
+      beta.true <- c(-5.5, 0.2)
+      gamma.true <- c(0.2, -0.5) # rare
+    }
+  }
+}
+
+## results
+if (param == "RR") {
+  result.brm <- est.result(data.brm, alpha.true)
+  result.brm_b <- est.result(data.brm.ad, alpha.true)
+  result.CMH <- est.result(df.CMH, alpha.true)
+  result.LB <- est.result(data.lb, alpha.true)
+  result.LP <- est.result(data.lp, alpha.true)
+  result.RLP <- est.result(data.rlp, alpha.true)
+  result.brm.FC <- est.result(data.firth, alpha.true)
+  result.brm.BC <- est.result(data.exact, alpha.true)
+  result.brm_b.BC <- est.result(data.exact.ad, alpha.true)
+  result.GC <- est.result(data.GC, alpha.true)
+  result.GC.BR <- est.result(data.GC.BR, alpha.true)
+  result.GC.FC <- est.result(data.GC.FC, alpha.true)
+  result.GC.FC.BR1 <- est.result(data.GC.FC.BR1, alpha.true)
+  result.GC.FC.BR2 <- est.result(data.GC.FC.BR2, alpha.true)
+
+  result <- cbind(
+    result.brm, result.CMH, result.LB, result.LP,
+    result.RLP, result.brm.FC, result.brm.BC, result.brm_b,
+    result.brm_b.BC, result.GC, result.GC.BR, result.GC.FC,
+    result.GC.FC.BR1, result.GC.FC.BR2
+  )
+
+  rownames(result) <- c("bias", "se", "acc", "coverage", "p")
+  colnames(result) <- c(
+    "brm", "CMH", "LB", "LP", "RLP", "brm-FC", "brm-BC",
+    "brm_b", "brm_b-BC", "GC", "GC-BR", "GC-FC", "GC-FC-BR1", "GC-FC-BR2"
+  )
+} else {
+  result.brm <- est.result(data.brm, alpha.true)
+  result.brm_b <- est.result(data.brm.ad, alpha.true)
+  result.bayesian <- est.result(data.bayes, alpha.true)
+  result.GLM <- est.result(data.glm, alpha.true)
+  result.LPM <- est.result(df.lpm, alpha.true)
+  result.MN <- est.result(data.MN, alpha.true)
+  result.brm.FC <- est.result(data.firth, alpha.true)
+  result.brm.BC <- est.result(data.exact, alpha.true)
+  result.brm_b.BC <- est.result(data.exact.ad, alpha.true)
+  result.GC <- est.result(df.GC, alpha.true)
+  result.GC.BR <- est.result(df.GC.BR, alpha.true)
+  result.GC.FC <- est.result(data.GC.FC, alpha.true)
+  result.GC.FC.BR1 <- est.result(data.GC.FC.BR1, alpha.true)
+  result.GC.FC.BR2 <- est.result(data.GC.FC.BR2, alpha.true)
+
+  result <- cbind(
+    result.brm, result.bayesian, result.GLM, result.LPM,
+    result.MN, result.brm.FC, result.brm.BC, result.brm_b, result.brm_b.BC,
+    result.GC, result.GC.BR, result.GC.FC, result.GC.FC.BR1, result.GC.FC.BR2
+  )
+
+  rownames(result) <- c("bias", "se", "acc", "coverage", "p")
+}
+
+write.csv(result, paste0("result_", param, "_", n, "_", event, "_", hypothesis, ".csv"))
+## plots
+
+if (param == "RR") {
+  data_names <- c(
+    "data.brm", "data.CMH", "data.lb", "data.lp",
+    "data.rlp", "data.firth", "data.exact", "data.brm.ad",
+    "data.exact.ad", "data.GC", "data.GC.BR", "data.GC.FC",
+    "data.GC.FC.BR1", "data.GC.FC.BR2"
+  )
+
+  data_list <- mget(data_names)
+
+  est <- do.call(cbind, lapply(data_list, `[[`, "estimate"))
+  colnames(est) <- c(
+    "brm", "CMH", "LB", "LP", "RLP", "brm-FC", "brm-BC",
+    "brm_b", "brm_b-BC", "GC", "GC-BR", "GC-FC", "GC-FC-BR1", "GC-FC-BR2"
+  )
+
+  se <- do.call(cbind, lapply(data_list, `[[`, "se"))
+  colnames(se) <- c(
+    "brm", "CMH", "LB", "LP", "RLP", "brm-FC", "brm-BC",
+    "brm_b", "brm_b-BC", "GC", "GC-BR", "GC-FC", "GC-FC-BR1", "GC-FC-BR2"
+  )
+} else {
+  data_names <- c(
+    "data.brm", "data.brm.ad", "data.bayes", "data.glm",
+    "data.lpm", "data.MN", "data.firth", "data.exact",
+    "data.exact.ad", "data.GC", "data.GC.BR", "data.GC.FC",
+    "data.GC.FC.BR1", "data.GC.FC.BR2"
+  )
+  data_list <- mget(data_names)
+
+  est <- do.call(cbind, lapply(data_list, `[[`, "estimate"))
+  colnames(est) <- c(
+    "brm", "brm_b", "bayesian", "GLM", "LPM", "MN", "brm-FC", "brm-BC",
+    "brm_b-BC", "GC", "GC-BR", "GC-FC", "GC-FC-BR1", "GC-FC-BR2"
+  )
+
+  se <- do.call(cbind, lapply(data_list, `[[`, "se"))
+  colnames(se) <- c(
+    "brm", "brm_b", "bayesian", "GLM", "LPM", "MN", "brm-FC", "brm-BC",
+    "brm_b-BC", "GC", "GC-BR", "GC-FC", "GC-FC-BR1", "GC-FC-BR2"
+  )
+}
+
+
+est_df <- as.data.frame(est)
+est_long <- melt(est, variable.name = "method", value.name = "estimate")
+colnames(est_long) <- c("number", "method", "estimate")
+
+removed_counts <- est_long %>%
+  group_by(method) %>%
+  summarise(removed = sum(is.na(estimate) | abs(estimate) >= 5))
+
+est_long_small <- est_long %>% filter(!is.na(estimate), abs(estimate) < 5)
+
+
+p1 <- ggplot(est_long_small, aes(x = method, y = estimate, fill = method)) +
+  # geom_violin(trim = FALSE, alpha = 0.7) +
+  geom_boxplot(color = "grey30", width = 0.6, outlier.size = 0.5, alpha = 0.9) +
+  ggsci::scale_fill_d3(palette = "category20") +
+  theme_minimal(base_size = 14) +
+  geom_hline(yintercept = alpha.true, linetype = "dashed", color = "steelblue") +
+  stat_summary(fun = mean, geom = "point", shape = 21, size = 1.5, fill = "white", color = "black") +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    legend.position = "none"
+  ) +
+  labs(
+    title = paste0("Monte Carlo when n = ", n),
+    x = "Method", y = "Estimate"
+  )
+
+stats <- est_long_small %>%
+  group_by(method) %>%
+  summarise(ypos = max(estimate, na.rm = TRUE))
+
+
+removed_counts <- left_join(removed_counts, stats, by = "method")
+
+
+p2 <- p1 + geom_text(
+  data = removed_counts,
+  aes(
+    x = method, y = ypos + 0.2,
+    label = paste0("(", removed, ")")
+  ),
+  inherit.aes = FALSE,
+  size = 3.5, color = "steelblue"
+)
+
+if (sum(removed_counts$removed) == 0) {
+  ggsave(paste0("est_", param, "_", event, "_", hypothesis, "_n_", n, ".png"), p1, width = 12, height = 6, dpi = 300)
+} else {
+  ggsave(paste0("est_", param, "_", event, "_", hypothesis, "_n_", n, ".png"), p2, width = 12, height = 6, dpi = 300)
+}
+
+### plot of SE
+
+se_df <- as.data.frame(se)
+se_long <- melt(se, variable.name = "method", value.name = "SE")
+colnames(se_long) <- c("number", "method", "SE")
+
+removed_counts <- se_long %>%
+  group_by(method) %>%
+  summarise(removed = sum(is.na(SE) | SE >= 5))
+
+
+se_long_small <- se_long %>% filter(!is.na(SE), SE < 5)
+
+p3 <- ggplot(se_long_small, aes(x = method, y = SE, fill = method)) +
+  geom_boxplot(alpha = 0.7, outlier.size = 0.5) +
+  theme_minimal(base_size = 14) +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    legend.position = "none"
+  ) +
+  labs(
+    title = paste0("Boxplot of RR for n = ", n),
+    x = "Method", y = "SE"
+  )
+
+stats <- se_long_small %>%
+  group_by(method) %>%
+  summarise(ypos = max(SE, na.rm = TRUE))
+
+removed_counts <- left_join(removed_counts, stats, by = "method")
+
+p4 <- p3 + geom_text(
+  data = removed_counts,
+  aes(
+    x = method, y = ypos + 0.2,
+    label = paste0("(", removed, ")")
+  ),
+  inherit.aes = FALSE,
+  size = 3.5, color = "steelblue"
+)
+
+if (sum(removed_counts$removed) == 0) {
+  ggsave(paste0("se_", param, "_", event, "_", hypothesis, "_n_", n, ".png"), p3, width = 12, height = 6, dpi = 300)
+} else {
+  ggsave(paste0("se_", param, "_", event, "_", hypothesis, "_n_", n, ".png"), p4, width = 12, height = 6, dpi = 300)
+}
+
+### bar for accuracy, coverage, and p-value
+df <- as.data.frame(t(result))
+df$method <- sub("^result\\.", "", rownames(df))
+rownames(df) <- NULL
+
+num_cols <- c("bias", "se", "acc", "coverage", "p")
+df <- df %>% mutate(across(all_of(num_cols), as.numeric))
+
+p_acc <- plot_metric(
+  df, "acc", paste0("Barplot of accuarcy at n = ", n), 1, 1.5,
+  paste0("accuracy ", param, " ", event, " ", hypothesis, " n = ", n, ".png")
+)
+p_cov <- plot_metric(
+  df, "coverage", paste0("Barplot of coverage at n = ", n), 0.95, 1.1,
+  paste0("coverage ", param, " ", event, " ", hypothesis, " n = ", n, ".png")
+)
diff --git a/compare/bayes_p.R b/compare/bayes_p.R
index d31d57c..9da34f8 100644
--- a/compare/bayes_p.R
+++ b/compare/bayes_p.R
@@ -1,144 +1,164 @@
-#' Bayesian estimator for Risk Difference on Fisher-z scale
-#'
-#' @description
-#' Conjugate-Beta model for two independent binomials in treatment (\eqn{x=1}) and control (\eqn{x=0}).
-#' Draws \eqn{p_1 \sim \text{Beta}(a_1+N_{1,1},\, b_1+N_{a1}-N_{1,1})} and
-#' \eqn{p_0 \sim \text{Beta}(a_0+N_{0,1},\, b_0+N_{a0}-N_{0,1})} by Monte Carlo,
-#' forms the risk difference \eqn{d = p_1 - p_0}, and reports summaries on the
-#' Fisher-\eqn{z} scale \eqn{\alpha = \operatorname{atanh}(d)} (stabilizes near the
-#' boundaries \eqn{[-1,1]}).
-#'
-#' @param Na0 Integer. Number at risk in control arm (\eqn{x=0}).
-#' @param Na1 Integer. Number at risk in treatment arm (\eqn{x=1}).
-#' @param N0_1 Integer. Number of events in control arm.
-#' @param N1_1 Integer. Number of events in treatment arm.
-#' @param a1,b1,a0,b0 Positive numerics. Beta prior hyperparameters for
-#'   \eqn{p_1 \sim \text{Beta}(a_1,b_1)} and \eqn{p_0 \sim \text{Beta}(a_0,b_0)}.
-#'   Defaults are Jeffreys \code{0.5, 0.5}.
-#' @param M Integer. Number of Monte Carlo draws (default \code{1e5}).
-#' @param conf Numeric in \code{(0,1)}. Credible mass for intervals (default \code{0.95}).
-#'
-#' @return A list with components (all on the \eqn{\alpha=\operatorname{atanh}(d)} scale):
-#' \describe{
-#'   \item{\code{point.est}}{Posterior mean of \eqn{\alpha}.}
-#'   \item{\code{se.est}}{Posterior SD of \eqn{\alpha}.}
-#'   \item{\code{conf.lower}, \code{conf.upper}}{Equal-tail credible interval endpoints.}
-#'   \item{\code{ET}}{Length-2 vector of equal-tail endpoints.}
-#'   \item{\code{HPD}}{Length-2 Highest Posterior Density interval from \pkg{HDInterval}.}
-#'   \item{\code{p.value}}{Posterior probability \eqn{\Pr(d>0)} (one-sided support for RD>0).}
-#' }
-#'
-
-bayes_est_RD <- function(Na0, Na1, N0_1, N1_1, a1=.5, b1=.5, a0=.5, b0=.5,
-                      M=1e5, conf=0.95){
-  p1 <- rbeta(M, a1 + N1_1, b1 + Na1-N1_1)
-  p0 <- rbeta(M, a0 + N0_1, b0 + Na0-N0_1)
-  d  <- p1 - p0
-  alpha <- atanh(d)
-  sd <- sd(alpha)
-  et <- quantile(alpha, c((1-conf)/2, 1-(1-conf)/2))
-  hpd<- HDInterval::hdi(alpha, credMass=conf)
-  list(point.est = mean(alpha), se.est = sd, conf.lower = min(et),
-       conf.upper = max(et), ET = et, HPD = hpd,
-       p.value = mean(d>0))
-}
-
-#' Bayesian estimator for Risk Ratio on log scale
-#'
-#' @description
-#' Conjugate-Beta model as above, but summarizes the risk ratio \eqn{d = p_1/p_0}
-#' on the log scale \eqn{\alpha=\log d}.
-#'
-#' @inheritParams bayes_est_RD
-#'
-#' @return A list with components (all on the \eqn{\alpha=\log(p_1/p_0)} scale):
-#' \describe{
-#'   \item{\code{point.est}}{Posterior mean of \eqn{\alpha}.}
-#'   \item{\code{se.est}}{Posterior SD of \eqn{\alpha}.}
-#'   \item{\code{conf.lower}, \code{conf.upper}}{Equal-tail credible interval endpoints.}
-#'   \item{\code{ET}}{Equal-tail endpoints.}
-#'   \item{\code{HPD}}{HPD interval via \pkg{HDInterval}.}
-#'   \item{\code{p.value}}{Posterior probability \eqn{\Pr(d>0)} (for RR>0 which is always true; often
-#'         redefine as \eqn{\Pr(\log RR>0)} if desired).}
-#' }
-
-bayes_est_RR <- function(Na0, Na1, N0_1, N1_1, a1=.5, b1=.5, a0=.5, b0=.5,
-                         M=1e5, conf=0.95){
-  p1 <- rbeta(M, a1 + N1_1, b1 + Na1-N1_1)
-  p0 <- rbeta(M, a0 + N0_1, b0 + Na0-N0_1)
-  d  <- p1/p0
-  alpha <- log(d)
-  sd <- sd(alpha)
-  et <- quantile(alpha, c((1-conf)/2, 1-(1-conf)/2))
-  hpd<- HDInterval::hdi(alpha, credMass=conf)
-  list(point.est = mean(alpha), se.est = sd, conf.lower = min(et),
-       conf.upper = max(et), ET = et, HPD = hpd,
-       p.value = mean(d>0))
-}
-
-
-
-#' g-computation helper functions
-#'
-#' @description
-#' This set of helper functions (\code{mu.est}, \code{l.mu}, \code{var.est},
-#' \code{m}, \code{m.prime}, \code{m.prime.prime}, \code{fish}, \code{hii},
-#' \code{phi}) are implementations used in the
-#' g-computation framework.  
-#' The code is written according to the methodology described in
-#' \url{https://arxiv.org/pdf/2509.07369}.
-
-mu.est <- function(y,x,beta){mean(c(y,m(x%*%beta)))}
-
-
-l.mu <- function(y1,x1,beta1,y0,x0,beta0){
-    mi <- c(m(x1%*%beta1),m(x0%*%beta0))
-    mi1 <- c(m(x1%*%beta1),m(x0%*%beta1))
-    mi0 <- c(m(x1%*%beta0),m(x0%*%beta0))
-    mu1 <- mu.est(y1,x0,beta1)
-    mu0 <- mu.est(y0,x1,beta0)
-    
-    li1 <- c((1+hii(x1,beta1))*(y1-mi[1:length(y1)]),rep(0,length(y0)))/(length(y1)/(length(c(y1,y0))))+mi1-mu1
-    li0 <- c(rep(0,length(y1)),(1+hii(x0,beta0))*(y0-mi[(length(y1)+1):length(mi)]))/(length(y0)/(length(c(y1,y0))))+mi0-mu0
-    return(cbind(li0,li1))
-  }
-
-var.est <- function(li,p0,p1){
-    cov <- var(li)/nrow(li)
-    return(cov[1,1]/(p0^2)+cov[2,2]/(p1^2)-2*cov[1,2]/(p0*p1))
-  }
-  
-m <- function(x){exp(x)/(1+exp(x))}
-m.prime <- function(x){exp(x)/(1+exp(x))^2}
-m.prime.prime <- function(x){exp(x)*(1-exp(x))/(1+exp(x))^3}
-
-fish <- function(x,beta){t(x)%*%diag(as.vector(m.prime(x%*%beta)))%*%x/nrow(x)}
-hii <- function(x,beta){m.prime(x%*%beta)*rowSums(x%*%ginv(nrow(x)*fish(x,beta))*x)}
-phi <- function(y,x,beta,p){x%*%ginv(fish(x,beta))*as.vector(y-m(x%*%beta))/p}
-
-#' Transform point/SE/CI for RD to Fisher-z scale
-#'
-#' @description
-#' Converts an estimate on the RD scale (\eqn{d}) and its SE/CI to the stabilized
-#' Fisher-\eqn{z} scale \eqn{\alpha=\operatorname{atanh}(d)} using the delta method:
-#' \eqn{\mathrm{SE}(\alpha) \approx \mathrm{SE}(d)/(1-d^2)}; CI endpoints are
-#' transformed via \code{atanh}.
-#'
-#' @param est Numeric scalar. Point estimate on RD scale.
-#' @param se Numeric scalar. Standard error of \code{est} on RD scale.
-#' @param conf Numeric length-2 vector. Confidence interval endpoints on RD scale.
-#'
-#' @return A list with
-#' \describe{
-#'   \item{\code{point.est}}{\eqn{\operatorname{atanh}(est)}.}
-#'   \item{\code{se.est}}{\eqn{se/(1-est^2)}.}
-#'   \item{\code{CI}}{Length-2 vector \code{atanh(conf)}.}
-#' }
-#'
-get_estimate <- function(est,se,conf){
-  Ealpha <- atanh(est) #+est*se^2/((1-est^2)^2)
-  Valpha <- se/(1-est^2)
-  CIalpha <- atanh(conf)
-  list(point.est = Ealpha, se.est = Valpha, CI = CIalpha)
-}
-
+#' Bayesian estimator for Risk Difference on Fisher-z scale
+#'
+#' @description
+#' Conjugate-Beta model for two independent binomials in treatment (\eqn{x=1}) and control (\eqn{x=0}).
+#' Draws \eqn{p_1 \sim \text{Beta}(a_1+N_{1,1},\, b_1+N_{a1}-N_{1,1})} and
+#' \eqn{p_0 \sim \text{Beta}(a_0+N_{0,1},\, b_0+N_{a0}-N_{0,1})} by Monte Carlo,
+#' forms the risk difference \eqn{d = p_1 - p_0}, and reports summaries on the
+#' Fisher-\eqn{z} scale \eqn{\alpha = \operatorname{atanh}(d)} (stabilizes near the
+#' boundaries \eqn{[-1,1]}).
+#'
+#' @param Na0 Integer. Number at risk in control arm (\eqn{x=0}).
+#' @param Na1 Integer. Number at risk in treatment arm (\eqn{x=1}).
+#' @param N0_1 Integer. Number of events in control arm.
+#' @param N1_1 Integer. Number of events in treatment arm.
+#' @param a1,b1,a0,b0 Positive numerics. Beta prior hyperparameters for
+#'   \eqn{p_1 \sim \text{Beta}(a_1,b_1)} and \eqn{p_0 \sim \text{Beta}(a_0,b_0)}.
+#'   Defaults are Jeffreys \code{0.5, 0.5}.
+#' @param M Integer. Number of Monte Carlo draws (default \code{1e5}).
+#' @param conf Numeric in \code{(0,1)}. Credible mass for intervals (default \code{0.95}).
+#'
+#' @return A list with components (all on the \eqn{\alpha=\operatorname{atanh}(d)} scale):
+#' \describe{
+#'   \item{\code{point.est}}{Posterior mean of \eqn{\alpha}.}
+#'   \item{\code{se.est}}{Posterior SD of \eqn{\alpha}.}
+#'   \item{\code{conf.lower}, \code{conf.upper}}{Equal-tail credible interval endpoints.}
+#'   \item{\code{ET}}{Length-2 vector of equal-tail endpoints.}
+#'   \item{\code{HPD}}{Length-2 Highest Posterior Density interval from \pkg{HDInterval}.}
+#'   \item{\code{p.value}}{Posterior probability \eqn{\Pr(d>0)} (one-sided support for RD>0).}
+#' }
+#'
+
+bayes_est_RD <- function(Na0, Na1, N0_1, N1_1, a1 = .5, b1 = .5, a0 = .5, b0 = .5,
+                         M = 1e5, conf = 0.95) {
+  p1 <- rbeta(M, a1 + N1_1, b1 + Na1 - N1_1)
+  p0 <- rbeta(M, a0 + N0_1, b0 + Na0 - N0_1)
+  d <- p1 - p0
+  alpha <- atanh(d)
+  sd <- sd(alpha)
+  et <- quantile(alpha, c((1 - conf) / 2, 1 - (1 - conf) / 2))
+  hpd <- HDInterval::hdi(alpha, credMass = conf)
+  list(
+    point.est = mean(alpha), se.est = sd, conf.lower = min(et),
+    conf.upper = max(et), ET = et, HPD = hpd,
+    p.value = mean(d > 0)
+  )
+}
+
+#' Bayesian estimator for Risk Ratio on log scale
+#'
+#' @description
+#' Conjugate-Beta model as above, but summarizes the risk ratio \eqn{d = p_1/p_0}
+#' on the log scale \eqn{\alpha=\log d}.
+#'
+#' @inheritParams bayes_est_RD
+#'
+#' @return A list with components (all on the \eqn{\alpha=\log(p_1/p_0)} scale):
+#' \describe{
+#'   \item{\code{point.est}}{Posterior mean of \eqn{\alpha}.}
+#'   \item{\code{se.est}}{Posterior SD of \eqn{\alpha}.}
+#'   \item{\code{conf.lower}, \code{conf.upper}}{Equal-tail credible interval endpoints.}
+#'   \item{\code{ET}}{Equal-tail endpoints.}
+#'   \item{\code{HPD}}{HPD interval via \pkg{HDInterval}.}
+#'   \item{\code{p.value}}{Posterior probability \eqn{\Pr(d>0)} (for RR>0 which is always true; often
+#'         redefine as \eqn{\Pr(\log RR>0)} if desired).}
+#' }
+
+bayes_est_RR <- function(Na0, Na1, N0_1, N1_1, a1 = .5, b1 = .5, a0 = .5, b0 = .5,
+                         M = 1e5, conf = 0.95) {
+  p1 <- rbeta(M, a1 + N1_1, b1 + Na1 - N1_1)
+  p0 <- rbeta(M, a0 + N0_1, b0 + Na0 - N0_1)
+  d <- p1 / p0
+  alpha <- log(d)
+  sd <- sd(alpha)
+  et <- quantile(alpha, c((1 - conf) / 2, 1 - (1 - conf) / 2))
+  hpd <- HDInterval::hdi(alpha, credMass = conf)
+  list(
+    point.est = mean(alpha), se.est = sd, conf.lower = min(et),
+    conf.upper = max(et), ET = et, HPD = hpd,
+    p.value = mean(d > 0)
+  )
+}
+
+
+#' g-computation helper functions
+#'
+#' @description
+#' This set of helper functions (\code{mu.est}, \code{l.mu}, \code{var.est},
+#' \code{m}, \code{m.prime}, \code{m.prime.prime}, \code{fish}, \code{hii},
+#' \code{phi}) are implementations used in the
+#' g-computation framework.
+#' The code is written according to the methodology described in
+#' \url{https://arxiv.org/pdf/2509.07369}.
+
+mu.est <- function(y, x, beta) {
+  mean(c(y, m(x %*% beta)))
+}
+
+
+l.mu <- function(y1, x1, beta1, y0, x0, beta0) {
+  mi <- c(m(x1 %*% beta1), m(x0 %*% beta0))
+  mi1 <- c(m(x1 %*% beta1), m(x0 %*% beta1))
+  mi0 <- c(m(x1 %*% beta0), m(x0 %*% beta0))
+  mu1 <- mu.est(y1, x0, beta1)
+  mu0 <- mu.est(y0, x1, beta0)
+
+  li1 <- c((1 + hii(x1, beta1)) * (y1 - mi[1:length(y1)]), rep(0, length(y0))) / (length(y1) / (length(c(y1, y0)))) + mi1 - mu1
+  li0 <- c(rep(0, length(y1)), (1 + hii(x0, beta0)) * (y0 - mi[(length(y1) + 1):length(mi)])) / (length(y0) / (length(c(y1, y0)))) + mi0 - mu0
+  return(cbind(li0, li1))
+}
+
+var.est.RR <- function(li, p0, p1) {
+  cov <- var(li) / nrow(li)
+  return(cov[1, 1] / (p0^2) + cov[2, 2] / (p1^2) - 2 * cov[1, 2] / (p0 * p1))
+}
+var.est.RD <- function(li, p0, p1) {
+  cov <- var(li) / nrow(li)
+  return((cov[1, 1] + cov[2, 2] - 2 * cov[1, 2]))
+}
+
+m <- function(x) {
+  exp(x) / (1 + exp(x))
+}
+m.prime <- function(x) {
+  exp(x) / (1 + exp(x))^2
+}
+m.prime.prime <- function(x) {
+  exp(x) * (1 - exp(x)) / (1 + exp(x))^3
+}
+
+fish <- function(x, beta) {
+  t(x) %*% diag(as.vector(m.prime(x %*% beta))) %*% x / nrow(x)
+}
+hii <- function(x, beta) {
+  m.prime(x %*% beta) * rowSums(x %*% ginv(nrow(x) * fish(x, beta)) * x)
+}
+phi <- function(y, x, beta, p) {
+  x %*% ginv(fish(x, beta)) * as.vector(y - m(x %*% beta)) / p
+}
+
+#' Transform point/SE/CI for RD to Fisher-z scale
+#'
+#' @description
+#' Converts an estimate on the RD scale (\eqn{d}) and its SE/CI to the stabilized
+#' Fisher-\eqn{z} scale \eqn{\alpha=\operatorname{atanh}(d)} using the delta method:
+#' \eqn{\mathrm{SE}(\alpha) \approx \mathrm{SE}(d)/(1-d^2)}; CI endpoints are
+#' transformed via \code{atanh}.
+#'
+#' @param est Numeric scalar. Point estimate on RD scale.
+#' @param se Numeric scalar. Standard error of \code{est} on RD scale.
+#' @param conf Numeric length-2 vector. Confidence interval endpoints on RD scale.
+#'
+#' @return A list with
+#' \describe{
+#'   \item{\code{point.est}}{\eqn{\operatorname{atanh}(est)}.}
+#'   \item{\code{se.est}}{\eqn{se/(1-est^2)}.}
+#'   \item{\code{CI}}{Length-2 vector \code{atanh(conf)}.}
+#' }
+#'
+get_estimate <- function(est, se, conf) {
+  Ealpha <- atanh(est) #+est*se^2/((1-est^2)^2)
+  Valpha <- se / (1 - est^2)
+  CIalpha <- atanh(conf)
+  list(point.est = Ealpha, se.est = Valpha, CI = CIalpha)
+}
diff --git a/compare/data_generation_simulation.R b/compare/data_generation_simulation.R
index 9246c40..28eed46 100644
--- a/compare/data_generation_simulation.R
+++ b/compare/data_generation_simulation.R
@@ -1,637 +1,663 @@
-#' Generate Simulated Binary Data under RR/RD Models
-#'
-#' @description
-#' Simulates a binary outcome \code{y} with a binary treatment \code{x} and two
-#' covariates \code{v.1} (intercept) and \code{v.2} (uniform on \eqn{[0,0.6]}).
-#' Treatment assignment follows a logistic propensity score with linear predictor
-#' \code{v %*% gamma.true}. Outcome probabilities under \code{x=0,1} are produced
-#' by \code{getProbRR} (for \code{param="RR"}) or \code{getProbRD} (for
-#' \code{param="RD"}), which must return a two-column matrix \code{cbind(p0, p1)}.
-#'
-#' @param param Character. \code{"RR"} (relative risk) or \code{"RD"} (risk difference).
-#' @param n Integer. Sample size.
-#' @param alpha.true Numeric vector (\eqn{p_a \times 1}). Structural parameter(s) for \eqn{\alpha}.
-#' @param beta.true Numeric vector (\eqn{p_b \times 1}). Structural parameter(s) for \eqn{\beta}.
-#' @param gamma.true Numeric vector (length 2). Coefficients for treatment model (\code{pscore}).
-#'
-#' @return A list with:
-#' \describe{
-#'   \item{\code{data}}{A \code{data.frame} with columns \code{y}, \code{x}, \code{v.1}, \code{v.2}.}
-#'   \item{\code{count}}{Numeric vector \code{c(Na0, Na1, N0_1, N1_1)} giving group sizes
-#'   and number of successes by arm.}
-#' }
-#'
-
-data.generation <- function(param, n, alpha.true, beta.true, gamma.true){
-
-  getProb = if (param == "RR") getProbRR else getProbRD
-
-  v.1         = rep(1,n)       # intercept term
-  v.2         = runif(n,0,0.6)
-  v           = cbind(v.1,v.2)
-  v.1 = as.matrix(v.1, ncol = 1)
-  pscore.true = exp(v %*% gamma.true) / (1+exp(v %*% gamma.true))
-  p0p1.true   = getProb(v.1 %*% alpha.true,v %*% beta.true)
-  x           = rbinom(n, 1, pscore.true)
-  pA.true       = p0p1.true[,1]
-  pA.true[x==1] = p0p1.true[x==1,2]
-  y = rbinom(n, 1, pA.true)
-
-  Na0 <- sum(x==0)
-  Na1 <- sum(x==1)
-  N0_1 <- sum(y[which(x==0)])
-  N1_1 <- sum(y[which(x==1)])
-
-  data.simulation <- list(data = data.frame(y,x,v), count = c(Na0,Na1,N0_1,N1_1))
-  return(data.simulation)
-}
-
-
-#' Quasi-Poisson Log-Link with Robust (HC0) SE for Treatment Effect
-#'
-#' @description
-#' Fits \code{glm(y ~ x + v.1 + v.2 - 1, family = quasipoisson(link="log"))} and
-#' reports the coefficient, robust standard error (HC0), Wald CI, and two-sided
-#' p-value for the \code{x} effect (assumed to be the first coefficient).
-#'
-#' @param data \code{data.frame} with columns \code{y}, \code{x}, \code{v.1}, \code{v.2}.
-#'
-#' @return Numeric vector \code{c(est, se.robust, lower, upper, p.value)} for the
-#' treatment coefficient on the log scale.
-#'
-#' @details
-#' The robust variance uses \code{vcovHC(fit, type="HC0")}. This can be viewed as
-#' a log–Poisson working model with overdispersion and sandwich SEs.
-
-
-quasi.poisson <- function(data){
-  fit.qp <- glm(y~x+v.1+v.2-1, family = quasipoisson(link = "log"), data = data)
-  vc <- vcovHC(fit.qp, type = "HC0")[1,1]
-  est <- coef(fit.qp)[1]
-  se.robust  <- sqrt(vc)
-  p.robust   <- 2 * (1 - pnorm(abs(est/se.robust)))
-  lower <- est - 1.96 * se.robust
-  upper <- est + 1.96 * se.robust
-
-  return(c(est,se.robust,lower,upper,p.robust))
-}
-
-#' Simulate and Compare RR Estimators Across Multiple Methods
-#'
-#' @description
-#' Generates data under an RR parametrization and computes estimates, SEs, CIs,
-#' and p-values for a suite of methods: BRM MLE, BRM+adaptive (Bayes fallback),
-#' CMH, log-binomial, log-Poisson, robust log-Poisson (quasi-Poisson + HC0),
-#' BRM+Firth, profile-exact (based on BRM), and several g-computation variants
-#' (plain, bias-reduction, Firth-corrected/FC with bias-reduction BR1/BR2). Returns a 5×14 matrix:
-#' rows = \code{point.est}, \code{se.est}, \code{con.lower}, \code{con.upper}, \code{p.value};
-#' columns labeled by method.
-#'
-#' @param n Integer. Sample size.
-#' @param event Character. \code{"common"} or \code{"rare"} to set truth.
-#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
-#'
-#' @return A numeric matrix with rows \code{point.est}, \code{se.est},
-#' \code{con.lower}, \code{con.upper}, \code{p.value} and 14 method columns:
-#' \code{c("brm","brm_ad","CMH","log-binomial","log-poisson","robust log-possion",
-#' "brm_firth","brm_exact","brm_exact_ad","g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")}.
-#'
-
-simulate.rr <- function(n, event, hypothesis){
-
-  if (event == "common"){
-    if (hypothesis == "null"){
-      alpha.true <- 0
-      beta.true  <- c(1.5, 0.6)
-      gamma.true <- c(0.2, -0.5)
-    }else{
-      alpha.true <- 0.3
-      beta.true  <- c(1.65, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }
-  }else{
-    if (hypothesis == "null"){
-      alpha.true <- 0
-      beta.true  <- c(-4.7, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }else{
-      alpha.true <- 0.7
-      beta.true  <- c(-5.5, 0.5)
-      gamma.true <- c(0.2, -0.5)
-    }
-  }
-
-  data.simulation <- data.generation('RR', n, alpha.true, beta.true, gamma.true)
-
-  va = as.matrix(data.simulation$data$v.1,ncol = 1)
-  vb = cbind(data.simulation$data$v.1,data.simulation$data$v.2)
-  y = data.simulation$data$y
-  x = data.simulation$data$x
-  Na0 = data.simulation$count[1]
-  Na1 = data.simulation$count[2]
-  N0_1 = data.simulation$count[3]
-  N1_1 = data.simulation$count[4]
-  P0 = N0_1/Na0
-  P1 = N1_1/Na1
-
-  pa = length(alpha.true)
-  pb = length(beta.true)
-  alpha.start = rep(0,pa)
-  beta.start = rep(0,pb)
-
-  weight = rep(1, length(y))
-  max.step = min(pa * 20, 1000)
-  thres = 1e-6
-  thres.dicho = 1e-3
-
-  ##brm
-  est.brm <- MLEst('RR', y, x, va, vb, weight, max.step, thres, alpha.start = rep(0, pa),
-                   beta.start = rep(0, pb), pa, pb)
-
-  ##CMH
-  sam.CMH <- matrix(c(Na0-N0_1,Na1-N1_1,N0_1,N1_1),2,2)
-  est.CMH <- riskratio(sam.CMH, method="small", correction=TRUE)
-
-  ##
-  v.1 = vb[,1]
-  v.2 = vb[,2]
-  ##log-binomial
-  est.lb <- glm(y~x+v.1+v.2-1, family = binomial(link = "log"), data = data.simulation$data, start = rep(-0.01,3))
-
-  ##log-poisson
-  est.lp <- glm(y~x+v.1+v.2-1, family = poisson(link = "log"), data = data.simulation$data)
-
-  ##robust log-poisson
-
-  est.rlp <- quasi.poisson(data.simulation$data)
-
-  ##brm + firth
-
-  est.brm.firth <- MLEst('RR', y, x, va, vb, weight, max.step, thres, alpha.start = rep(0, pa),
-                               beta.start = rep(0, pb), pa, pb, method="firth")
-
-  ## brm_ad
-  est.brm.ad = est.brm
-  if(P0==0|P0==1|P1==0|P1==1) {
-    est.bayes = bayes_est_RR(Na0,Na1,N0_1,N1_1)
-    est.brm.ad$point.est[1] = est.bayes$point.est
-    est.brm.ad$se.est[1] = est.bayes$se.est
-    est.brm.ad$conf.lower[1] = est.bayes$conf.lower
-    est.brm.ad$conf.upper[1] = est.bayes$conf.upper
-    est.brm.ad$p.value[1] = est.bayes$p.value
-  }
-
-  ##g-computaion & g-computation_BR
-  Y1 <- y[which(x==1)]
-  Y0 <- y[which(x==0)]
-  V2.1 <- v.2[which(x==1)]
-  V2.0 <- v.2[which(x==0)]
-  X1 <- x[which(x==1)]
-  X0 <- x[which(x==0)]
-
-  data.treat <- data.frame(Y1,V2.1)
-  data.control <- data.frame(Y0,V2.0)
-
-  est.treat <- glm(Y1~V2.1, family = binomial, data = data.treat)
-  est.control <- glm(Y0~V2.0, family = binomial, data = data.control)
-
-  beta.hat.treat <- est.treat$coefficients
-  beta.hat.control <- est.control$coefficients
-
-  V.FC.treat <- cbind(1,V2.1)
-  V.FC.control <- cbind(1,V2.0)
-
-
-  beta.hat.star.treat <- beta.hat.treat + colMeans(hatvalues(est.treat)*phi(Y1,V.FC.treat,beta.hat.treat,sum(x==1)/n))
-  beta.hat.star.control <- beta.hat.control + colMeans(hatvalues(est.control)*phi(Y0,V.FC.control,beta.hat.control,sum(x==0)/n))
-
-  #beta_hat
-  p.hat.treat <- mean(c(Y1,predict(est.treat,newdata = data.control, type = "response")))
-  p.hat.control <- mean(c(Y0,predict(est.control,newdata = data.treat, type = "response")))
-  alpha.hat <- log(p.hat.treat/p.hat.control)
-
-  #beta_hat_star
-  p.hat.star.treat <-mean(c(Y1,m(V.FC.control%*%beta.hat.star.treat)))
-  p.hat.star.control <- mean(c(Y0,m(V.FC.treat%*%beta.hat.star.control)))
-  alpha.hat.star <- log(p.hat.star.treat/p.hat.star.control)
-
-  li.hat <- l.mu(Y1,V.FC.treat,beta.hat.treat,Y0,V.FC.control,beta.hat.control)
-  li.hat.star <- l.mu(Y1,V.FC.treat,beta.hat.star.treat,Y0,V.FC.control,beta.hat.star.control)
-
-  se.hat <- sqrt(var.est(li.hat,p.hat.control,p.hat.treat))
-  se.hat.star <- sqrt(var.est(li.hat.star,p.hat.star.control,p.hat.star.treat))
-
-  #beta_tilde
-  fit.treat <- logistf(Y1 ~ V2.1,data = data.treat)
-  fit.control <- logistf(Y0 ~ V2.0,data = data.control)
-
-  beta.tilde.treat <- fit.treat$coefficients
-  beta.tilde.control <- fit.control$coefficients
-
-  #beta_tilde_star
-  beta.tilde.star.treat <- beta.tilde.treat + colMeans(as.vector(hii(V.FC.treat,beta.tilde.treat))*(phi(Y1,V.FC.treat,beta.tilde.treat,sum(x==1)/n)
-                                                                                                    -(V.FC.treat*as.vector(1-2*m(V.FC.treat%*%beta.tilde.treat)))%*%t(ginv(fish(V.FC.treat,beta.tilde.treat)))/2))
-  beta.tilde.star.control <- beta.tilde.control + colMeans(as.vector(hii(V.FC.control,beta.tilde.control))*(phi(Y0,V.FC.control,beta.tilde.control,sum(x==0)/n)
-                                                                                                            -(V.FC.control*as.vector(1-2*m(V.FC.control%*%beta.tilde.control)))%*%t(ginv(fish(V.FC.control,beta.tilde.control)))/2))
-  #beta_tilde_doustar
-  beta.tilde.doustar.treat <- beta.tilde.treat - colMeans(as.vector(hii(V.FC.treat,beta.tilde.treat))*((V.FC.treat*as.vector(1-2*m(V.FC.treat%*%beta.tilde.treat)))%*%t(ginv(fish(V.FC.treat,beta.tilde.treat)))/2))
-  beta.tilde.doustar.control <- beta.tilde.control - colMeans(as.vector(hii(V.FC.control,beta.tilde.control))*((V.FC.control*as.vector(1-2*m(V.FC.control%*%beta.tilde.control)))%*%t(ginv(fish(V.FC.control,beta.tilde.control)))/2))
-
-  #beta_tilde
-  p.tilde.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.treat)))
-  p.tilde.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.control)))
-  alpha.tilde <- log(p.tilde.treat/p.tilde.control)
-
-  #beta_tilde_star
-  p.tilde.star.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.star.treat)))
-  p.tilde.star.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.star.control)))
-  alpha.tilde.star <- log(p.tilde.star.treat/p.tilde.star.control)
-
-  #beta_tilde_starstar
-  p.tilde.doustar.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.doustar.treat)))
-  p.tilde.doustar.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.doustar.control)))
-  alpha.tilde.doustar <- log(p.tilde.doustar.treat/p.tilde.doustar.control)
-
-  li.tilde <- l.mu(Y1,V.FC.treat,beta.tilde.treat,Y0,V.FC.control,beta.tilde.control)
-  li.tilde.star <- l.mu(Y1,V.FC.treat,beta.tilde.star.treat,Y0,V.FC.control,beta.tilde.star.control)
-  li.tilde.doustar <- l.mu(Y1,V.FC.treat,beta.tilde.doustar.treat,Y0,V.FC.control,beta.tilde.doustar.control)
-
-  se.tilde <- sqrt(var.est(li.tilde,p.tilde.control,p.tilde.treat))
-  se.tilde.star <- sqrt(var.est(li.tilde.star,p.tilde.star.control,p.tilde.star.treat))
-  se.tilde.doustar <- sqrt(var.est(li.tilde.doustar,p.tilde.doustar.control,p.tilde.doustar.treat))
-
-
-
-  ##brm+exact
-  est.exact <- exact('RR', y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm$point.est, est.brm$se.est, pa, pb)
-  est.exact.ad <- exact('RR', y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.ad$point.est, est.brm.ad$se.est, pa, pb)
-
-  ###result
-  point.est <- as.vector(c(est.brm$point.est[1],
-                 est.brm.ad$point.est[1],
-                 log(est.CMH$measure[2,1]),
-                 est.lb$coefficients[1],
-                 est.lp$coefficients[1],
-                 est.rlp[1],
-                 est.brm.firth$point.est[1],
-                 est.brm$point.est[1],
-                 est.brm.ad$point.est[1],
-                 alpha.hat,
-                 alpha.hat.star,
-                 alpha.tilde,
-                 alpha.tilde.star,
-                 alpha.tilde.doustar))
-  se.est <- as.vector(c(est.brm$se.est[1],
-              est.brm.ad$se.est[1],
-              (log(est.CMH$measure[2,1])-log(est.CMH$measure[2,2]))/qnorm(0.975),
-              summary(est.lb)$coefficients[1,2],
-              summary(est.lp)$coefficients[1,2],
-              est.rlp[2],
-              est.brm.firth$se.est[1],
-              est.brm$se.est[1],
-              est.brm.ad$se.est[1],
-              se.hat,
-              se.hat.star,
-              se.tilde,
-              se.tilde.star,
-              se.tilde.doustar))
-  con.lower <- as.vector(c(est.brm$conf.lower[1],
-                 est.brm.ad$conf.lower[1],
-                 log(est.CMH$measure[2,2]),
-                 confint.default(est.lb,level = 0.95)[1,1],
-                 confint.default(est.lp,level = 0.95)[1,1],
-                 est.rlp[3],
-                 est.brm.firth$conf.lower[1],
-                 est.exact$low[1],
-                 est.exact.ad$low[1],
-                 alpha.hat-qnorm(0.975)*se.hat,
-                 alpha.hat.star-qnorm(0.975)*se.hat.star,
-                 alpha.tilde-qnorm(0.975)*se.tilde,
-                 alpha.tilde.star-qnorm(0.975)*se.tilde.star,
-                 alpha.tilde.doustar-qnorm(0.975)*se.tilde.doustar))
-  con.upper <- as.vector(c(est.brm$conf.upper[1],
-                 est.brm.ad$conf.upper[1],
-                 log(est.CMH$measure[2,3]),
-                 confint.default(est.lb,level = 0.95)[1,2],
-                 confint.default(est.lp,level = 0.95)[1,2],
-                 est.rlp[4],
-                 est.brm.firth$conf.upper[1],
-                 est.exact$up[1],
-                 est.exact.ad$up[1],
-                 alpha.hat+qnorm(0.975)*se.hat,
-                 alpha.hat.star+qnorm(0.975)*se.hat.star,
-                 alpha.tilde+qnorm(0.975)*se.tilde,
-                 alpha.tilde.star+qnorm(0.975)*se.tilde.star,
-                 alpha.tilde.doustar+qnorm(0.975)*se.tilde.doustar))
-
-  p.value <- as.vector(c(est.brm$p.value[1],
-               est.brm.ad$p.value[1],
-               est.CMH$p.value[2,1],
-               summary(est.lb)$coefficients[1,4],
-               summary(est.lp)$coefficients[1,4],
-               est.rlp[5],
-               est.brm.firth$p.value[1],
-               est.exact$p[1],
-               est.exact.ad$p[1],
-               2*min(pnorm(alpha.hat/se.hat),1-pnorm(alpha.hat/se.hat)),
-               2*min(pnorm(alpha.hat.star/se.hat.star),1-pnorm(alpha.hat.star/se.hat.star)),
-               2*min(pnorm(alpha.tilde/se.tilde),1-pnorm(alpha.tilde/se.tilde)),
-               2*min(pnorm(alpha.tilde.star/se.tilde.star),1-pnorm(alpha.tilde.star/se.tilde.star)),
-               2*min(pnorm(alpha.tilde.doustar/se.tilde.doustar),1-pnorm(alpha.tilde.doustar/se.tilde.doustar))))
-
-  result.comp <- rbind(point.est,se.est,con.lower,con.upper,p.value)
-  colnames(result.comp) <- c("brm","brm_ad","CMH","log-binomial","log-poisson","robust log-possion","brm_firth",
-                             "brm_exact","brm_exact_ad","g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")
-  return(result.comp)
-}
-
-#' Simulate and Compare RD Estimators Across Multiple Methods
-#'
-#' @description
-#' Generates data under an RD parametrization and computes estimates, SEs, CIs,
-#' and p-values for multiple methods: BRM MLE (original and adaptive Bayes),
-#' Bayesian RD with simple conjugate prior, GLM with identity link (if feasible),
-#' LPM with robust SEs, Miettinen–Nurminen (MN), BRM+Firth, profile-exact, and
-#' g-computation variants (plain, BR, FC and BR1/BR2). Returns a 5×14 matrix with
-#' rows \code{point.est}, \code{se.est}, \code{CI.low.or}, \code{CI.up.or}, \code{p.value}.
-#'
-#' @param n Integer. Sample size.
-#' @param event Character. \code{"common"} or \code{"rare"} to set truth.
-#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
-#'
-#' @return A numeric matrix with rows \code{point.est}, \code{se.est},
-#' \code{CI.low.or}, \code{CI.up.or}, \code{p.value} and 14 method columns:
-#' \code{c("brm","brm_ad","bayes","glm","lpm","MN","firth","brm_exact","brm_exact_ad",
-#' "g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")}.
-
-simulate.rd <- function(n, event, hypothesis){
-
-  if (event == "common"){
-    if (hypothesis == "null"){
-      alpha.true = 0
-      beta.true   = c(0.9,0.5)
-      gamma.true  = c(0.2,-0.5)
-    }else{
-      alpha.true = 0.1
-      beta.true   = c(0.9,0.2)
-      gamma.true  = c(0.2,-0.5)
-    }
-  }else{
-    if (hypothesis == "null"){
-      alpha.true = 0
-      beta.true   = c(-4.5,0.5)
-      gamma.true  = c(0.2,-0.5)
-    }else{
-      alpha.true = 0.05
-      beta.true   = c(-5.5,0.2)
-      gamma.true  = c(0.2,-0.5)# rare
-    }
-  }
-
-  data.simulation <- data.generation('RD', n, alpha.true, beta.true, gamma.true)
-  va = as.matrix(data.simulation$data$v.1,ncol = 1)
-  vb = cbind(data.simulation$data$v.1,data.simulation$data$v.2)
-  y = data.simulation$data$y
-  x = data.simulation$data$x
-  Na0 = data.simulation$count[1]
-  Na1 = data.simulation$count[2]
-  N0_1 = data.simulation$count[3]
-  N1_1 = data.simulation$count[4]
-
-  P0 = N0_1/Na0
-  P1 = N1_1/Na1
-
-  pa = length(alpha.true)
-  pb = length(beta.true)
-  alpha.start = rep(0,pa)
-  beta.start = rep(0,pb)
-
-  weight = rep(1, length(y))
-  max.step = min(pa * 20, 1000)
-  thres = 1e-6
-
-  ##brm
-  est.brm.or <- MLEst('RD', y, x, va, vb, weight, max.step, thres, alpha.start = rep(0, pa),
-                      beta.start = rep(0, pb), pa, pb)
-
-  est.brm.ad = est.brm.or
-  if(P0==0|P0==1|P1==0|P1==1) {
-    est.bayes = bayes_est_RD(Na0,Na1,N0_1,N1_1)
-    est.brm.ad$point.est[1] = est.bayes$point.est
-    est.brm.ad$se.est[1] = est.bayes$se.est
-    est.brm.ad$conf.lower[1] = est.bayes$conf.lower
-    est.brm.ad$conf.upper[1] = est.bayes$conf.upper
-    est.brm.ad$p.value[1] = est.bayes$p.value
-  }
-
-  ## bayesian prior
-  est.bayes = bayes_est_RD(Na0,Na1,N0_1,N1_1)
-
-  v.1 = vb[,1]
-  v.2 = vb[,2]
-  ## GLM with identity link (calc_risk with identity link?)
-  e.glm <- glm(y~x+v.1+v.2-1, family = binomial(link = "identity"), data = data.simulation$data,start = rep(0.01,3))
-  est.glm <- get_estimate(e.glm$coefficients[1], summary(e.glm)$coefficients[1,2], as.numeric(confint.default(e.glm,level = 0.95)[1,]))
-
-  ## Linear probability model (LPM) + robust SE
-  lpm <- lm(y~x+v.1+v.2-1,data = data.simulation$data)
-  e.lpm <- coeftest(lpm, vcov = vcovHC(lpm,type = "HC3"))
-  est.lpm <- get_estimate(e.lpm[1,1],e.lpm[1,2],as.numeric(c(e.lpm[1,1]-1.96*e.lpm[1,2],e.lpm[1,1]+1.96*e.lpm[1,2])))
-
-  ## Miettinen–Nurminen
-  est.MN.point <- P1-P0
-  est.MN.CI <- diffscoreci(N1_1, Na1, N0_1, Na0, conf.level = 0.95)
-  est.MN.se <- (est.MN.CI$conf.int[2]-est.MN.CI$conf.int[1])/(2*qnorm(0.975))
-  est.MN <- get_estimate(est.MN.point,est.MN.se,c(est.MN.CI$conf.int[1],est.MN.CI$conf.int[2]))
-  #  p.MN <- 2*(1-pnorm(abs(z2stat(N1_1,Na1,N0_1,Na0,dif=0))))
-
-
-  ##g-computaion & g-computation_BR
-  Y1 <- y[which(x==1)]
-  Y0 <- y[which(x==0)]
-  V2.1 <- v.2[which(x==1)]
-  V2.0 <- v.2[which(x==0)]
-  X1 <- x[which(x==1)]
-  X0 <- x[which(x==0)]
-
-  data.treat <- data.frame(Y1,V2.1)
-  data.control <- data.frame(Y0,V2.0)
-
-  est.treat <- glm(Y1~V2.1, family = binomial, data = data.treat)
-  est.control <- glm(Y0~V2.0, family = binomial, data = data.control)
-
-  beta.hat.treat <- est.treat$coefficients
-  beta.hat.control <- est.control$coefficients
-
-  V.FC.treat <- cbind(1,V2.1)
-  V.FC.control <- cbind(1,V2.0)
-
-
-  beta.hat.star.treat <- beta.hat.treat + colMeans(hatvalues(est.treat)*phi(Y1,V.FC.treat,beta.hat.treat,sum(x==1)/n))
-  beta.hat.star.control <- beta.hat.control + colMeans(hatvalues(est.control)*phi(Y0,V.FC.control,beta.hat.control,sum(x==0)/n))
-
-  #beta_tilde
-  fit.treat <- logistf(Y1 ~ V2.1,data = data.treat)
-  fit.control <- logistf(Y0 ~ V2.0,data = data.control)
-
-  beta.tilde.treat <- fit.treat$coefficients
-  beta.tilde.control <- fit.control$coefficients
-
-  #beta_tilde_star
-  beta.tilde.star.treat <- beta.tilde.treat + colMeans(as.vector(hii(V.FC.treat,beta.tilde.treat))*(phi(Y1,V.FC.treat,beta.tilde.treat,sum(x==1)/n)
-                                                                                                    -(V.FC.treat*as.vector(1-2*m(V.FC.treat%*%beta.tilde.treat)))%*%t(ginv(fish(V.FC.treat,beta.tilde.treat)))/2))
-  beta.tilde.star.control <- beta.tilde.control + colMeans(as.vector(hii(V.FC.control,beta.tilde.control))*(phi(Y0,V.FC.control,beta.tilde.control,sum(x==0)/n)
-                                                                                                            -(V.FC.control*as.vector(1-2*m(V.FC.control%*%beta.tilde.control)))%*%t(ginv(fish(V.FC.control,beta.tilde.control)))/2))
-  #beta_tilde_doustar
-  beta.tilde.doustar.treat <- beta.tilde.treat - colMeans(as.vector(hii(V.FC.treat,beta.tilde.treat))*((V.FC.treat*as.vector(1-2*m(V.FC.treat%*%beta.tilde.treat)))%*%t(ginv(fish(V.FC.treat,beta.tilde.treat)))/2))
-  beta.tilde.doustar.control <- beta.tilde.control - colMeans(as.vector(hii(V.FC.control,beta.tilde.control))*((V.FC.control*as.vector(1-2*m(V.FC.control%*%beta.tilde.control)))%*%t(ginv(fish(V.FC.control,beta.tilde.control)))/2))
-
-  p.hat.treat <- mean(c(Y1,m(V.FC.control%*%beta.hat.treat)))
-  p.hat.control <- mean(c(Y0,m(V.FC.treat%*%beta.hat.control)))
-  alpha.hat <- atanh(p.hat.treat-p.hat.control)
-
-  #beta_hat_star
-  p.hat.star.treat <-mean(c(Y1,m(V.FC.control%*%beta.hat.star.treat)))
-  p.hat.star.control <- mean(c(Y0,m(V.FC.treat%*%beta.hat.star.control)))
-  alpha.hat.star <- atanh(p.hat.star.treat-p.hat.star.control)
-
-  #beta_tilde
-  p.tilde.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.treat)))
-  p.tilde.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.control)))
-  alpha.tilde <- atanh(p.tilde.treat-p.tilde.control)
-
-  p.tilde.star.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.star.treat)))
-  p.tilde.star.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.star.control)))
-  alpha.tilde.star <- atanh(p.tilde.star.treat-p.tilde.star.control)
-
-  #beta_tilde_starstar
-  p.tilde.doustar.treat <-mean(c(Y1,m(V.FC.control%*%beta.tilde.doustar.treat)))
-  p.tilde.doustar.control <- mean(c(Y0,m(V.FC.treat%*%beta.tilde.doustar.control)))
-  alpha.tilde.doustar <- atanh(p.tilde.doustar.treat-p.tilde.doustar.control)
-
-
-  li.hat <- l.mu(Y1,V.FC.treat,beta.hat.treat,Y0,V.FC.control,beta.hat.control)
-  li.hat.star <- l.mu(Y1,V.FC.treat,beta.hat.star.treat,Y0,V.FC.control,beta.hat.star.control)
-  li.tilde <- l.mu(Y1,V.FC.treat,beta.tilde.treat,Y0,V.FC.control,beta.tilde.control)
-  # li.firth <- l.mu(Y1,V.FC.treat,beta.firth.treat,Y0,V.FC.control,beta.firth.control)
-  li.tilde.star <- l.mu(Y1,V.FC.treat,beta.tilde.star.treat,Y0,V.FC.control,beta.tilde.star.control)
-  li.tilde.doustar <- l.mu(Y1,V.FC.treat,beta.tilde.doustar.treat,Y0,V.FC.control,beta.tilde.doustar.control)
-
-  se.hat <- sqrt(var.est(li.hat,p.hat.control,p.hat.treat))
-  se.hat.star <- sqrt(var.est(li.hat.star,p.hat.star.control,p.hat.star.treat))
-  se.tilde <- sqrt(var.est(li.tilde,p.tilde.control,p.tilde.treat))
-  # se.firth <- sqrt(var.est(li.firth,p.firth.control,p.firth.treat))
-  se.tilde.star <- sqrt(var.est(li.tilde.star,p.tilde.star.control,p.tilde.star.treat))
-  se.tilde.doustar <- sqrt(var.est(li.tilde.doustar,p.tilde.doustar.control,p.tilde.doustar.treat))
-
-
-  ##brm_firth
-  est.brm.Firth <- MLEst('RD', y, x, va, vb, weight, max.step, thres, alpha.start = rep(0, pa),
-  beta.start = rep(0, pb), pa, pb, method="firth")
-
-
-
-  #### CI and p.value
-
-  est.exact.ad <- exact('RD', y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.ad$point.est, est.brm.ad$se.est, pa, pb)
-  est.exact <- exact('RD', y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.or$point.est, est.brm.or$se.est, pa, pb)
-
-
-  ###result
-  point.est <- c(est.brm.or$point.est[1],
-                 est.brm.ad$point.est[1],
-                 est.bayes$point.est,
-                 est.glm$point.est,
-                 est.lpm$point.est,
-                 est.MN$point.est,
-                 est.brm.Firth$point.est[1],
-                 est.brm.or$point.est[1],
-                 est.brm.ad$point.est[1],
-                 alpha.hat,
-                 alpha.hat.star,
-                 alpha.tilde,
-                 alpha.tilde.star,
-                 alpha.tilde.doustar)
-  se.est <- c(est.brm.or$se.est[1],
-              est.brm.ad$se.est[1],
-              est.bayes$se.est,
-              est.glm$se.est,
-              est.lpm$se.est,
-              est.MN$se.est,
-              est.brm.Firth$se.est[1],
-              est.brm.or$se.est[1],
-              est.brm.ad$se.est[1],
-              se.hat,
-              se.hat.star,
-              se.tilde,
-              se.tilde.star,
-              se.tilde.doustar)
-  CI.low.or <- c(est.brm.or$conf.lower[1],
-                 est.brm.ad$conf.lower[1],
-                 est.bayes$conf.lower,
-                 est.glm$CI[1],
-                 est.lpm$CI[1],
-                 est.MN$CI[1],
-                 est.brm.Firth$conf.lower[1],
-                 est.exact$low[1],
-                 est.exact.ad$low[1],
-                 alpha.hat-qnorm(0.975)*se.hat,
-                 alpha.hat.star-qnorm(0.975)*se.hat.star,
-                 alpha.tilde-qnorm(0.975)*se.tilde,
-                 alpha.tilde.star-qnorm(0.975)*se.tilde.star,
-                 alpha.tilde.doustar-qnorm(0.975)*se.tilde.doustar)
-  CI.up.or <- c(est.brm.or$conf.upper[1],
-                est.brm.ad$conf.upper[1],
-                est.bayes$conf.upper,
-                est.glm$CI[2],
-                est.lpm$CI[2],
-                est.MN$CI[2],
-                est.brm.Firth$conf.upper[1],
-                est.exact$up[1],
-                est.exact.ad$up[1],
-                alpha.hat+qnorm(0.975)*se.hat,
-                alpha.hat.star+qnorm(0.975)*se.hat.star,
-                alpha.tilde+qnorm(0.975)*se.tilde,
-                alpha.tilde.star+qnorm(0.975)*se.tilde.star,
-                alpha.tilde.doustar+qnorm(0.975)*se.tilde.doustar)
-  p.value <- c(est.brm.or$p.value[1],
-               est.brm.ad$p.value[1],
-               est.bayes$p.value,
-               summary(e.glm)$coefficients[1,4],
-               summary(lpm)$coefficients[1,4],
-               min(pnorm(alpha.hat/se.hat),1-pnorm(alpha.hat/se.hat)),
-               est.brm.Firth$p.value[1],
-               est.exact$p[1],
-               est.exact.ad$p[1],
-               2*min(pnorm(est.MN$point.est/est.MN$se.est),1-pnorm(est.MN$point.est/est.MN$se.est)),
-               2*min(pnorm(alpha.hat.star/se.hat.star),1-pnorm(alpha.hat.star/se.hat.star)),
-               2*min(pnorm(alpha.tilde/se.tilde),1-pnorm(alpha.tilde/se.tilde)),
-               2*min(pnorm(alpha.tilde.star/se.tilde.star),1-pnorm(alpha.tilde.star/se.tilde.star)),
-               2*min(pnorm(alpha.tilde.doustar/se.tilde.doustar),1-pnorm(alpha.tilde.doustar/se.tilde.doustar)))
-
-  result.comp <- rbind(point.est,se.est,CI.low.or,CI.up.or,p.value)
-  colnames(result.comp) <- c("brm","brm_ad","bayes","glm","lpm","MN", "firth",
-                             "brm_exact","brm_exact_ad","g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")
-  return(result.comp)
-}
-
-#' Run a Single Simulation for RR or RD
-#'
-#' @description
-#' Dispatch helper that runs \code{simulate.rr()} if \code{param="RR"} and
-#' \code{simulate.rd()} otherwise.
-#'
-#' @param param Character. \code{"RR"} or \code{"RD"}.
-#' @param n Integer. Sample size.
-#' @param event Character. \code{"common"} or \code{"rare"}.
-#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
-#'
-#' @return The matrix returned by the corresponding simulator.
-
-
-run <- function(param,n,event,hypothesis){
-  simulate.fun = if (param == "RR") simulate.rr else simulate.rd
-  result = simulate.fun(n,event,hypothesis)
-  return(result)
-}
-
+#' Generate Simulated Binary Data under RR/RD Models
+#'
+#' @description
+#' Simulates a binary outcome \code{y} with a binary treatment \code{x} and two
+#' covariates \code{v.1} (intercept) and \code{v.2} (uniform on \eqn{[0,0.6]}).
+#' Treatment assignment follows a logistic propensity score with linear predictor
+#' \code{v %*% gamma.true}. Outcome probabilities under \code{x=0,1} are produced
+#' by \code{getProbRR} (for \code{param="RR"}) or \code{getProbRD} (for
+#' \code{param="RD"}), which must return a two-column matrix \code{cbind(p0, p1)}.
+#'
+#' @param param Character. \code{"RR"} (relative risk) or \code{"RD"} (risk difference).
+#' @param n Integer. Sample size.
+#' @param alpha.true Numeric vector (\eqn{p_a \times 1}). Structural parameter(s) for \eqn{\alpha}.
+#' @param beta.true Numeric vector (\eqn{p_b \times 1}). Structural parameter(s) for \eqn{\beta}.
+#' @param gamma.true Numeric vector (length 2). Coefficients for treatment model (\code{pscore}).
+#'
+#' @return A list with:
+#' \describe{
+#'   \item{\code{data}}{A \code{data.frame} with columns \code{y}, \code{x}, \code{v.1}, \code{v.2}.}
+#'   \item{\code{count}}{Numeric vector \code{c(Na0, Na1, N0_1, N1_1)} giving group sizes
+#'   and number of successes by arm.}
+#' }
+#'
+
+data.generation <- function(param, n, alpha.true, beta.true, gamma.true) {
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  v.1 <- rep(1, n) # intercept term
+  v.2 <- runif(n, 0, 0.6)
+  v <- cbind(v.1, v.2)
+  v.1 <- as.matrix(v.1, ncol = 1)
+  pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true))
+  p0p1.true <- getProb(v.1 %*% alpha.true, v %*% beta.true)
+  x <- rbinom(n, 1, pscore.true)
+  pA.true <- p0p1.true[, 1]
+  pA.true[x == 1] <- p0p1.true[x == 1, 2]
+  y <- rbinom(n, 1, pA.true)
+
+  Na0 <- sum(x == 0)
+  Na1 <- sum(x == 1)
+  N0_1 <- sum(y[which(x == 0)])
+  N1_1 <- sum(y[which(x == 1)])
+
+  data.simulation <- list(data = data.frame(y, x, v), count = c(Na0, Na1, N0_1, N1_1))
+  return(data.simulation)
+}
+
+
+#' Quasi-Poisson Log-Link with Robust (HC0) SE for Treatment Effect
+#'
+#' @description
+#' Fits \code{glm(y ~ x + v.1 + v.2 - 1, family = quasipoisson(link="log"))} and
+#' reports the coefficient, robust standard error (HC0), Wald CI, and two-sided
+#' p-value for the \code{x} effect (assumed to be the first coefficient).
+#'
+#' @param data \code{data.frame} with columns \code{y}, \code{x}, \code{v.1}, \code{v.2}.
+#'
+#' @return Numeric vector \code{c(est, se.robust, lower, upper, p.value)} for the
+#' treatment coefficient on the log scale.
+#'
+#' @details
+#' The robust variance uses \code{vcovHC(fit, type="HC0")}. This can be viewed as
+#' a log–Poisson working model with overdispersion and sandwich SEs.
+
+
+quasi.poisson <- function(data) {
+  fit.qp <- glm(y ~ x + v.1 + v.2 - 1, family = quasipoisson(link = "log"), data = data)
+  vc <- vcovHC(fit.qp, type = "HC0")[1, 1]
+  est <- coef(fit.qp)[1]
+  se.robust <- sqrt(vc)
+  p.robust <- 2 * (1 - pnorm(abs(est / se.robust)))
+  lower <- est - 1.96 * se.robust
+  upper <- est + 1.96 * se.robust
+
+  return(c(est, se.robust, lower, upper, p.robust))
+}
+
+#' Simulate and Compare RR Estimators Across Multiple Methods
+#'
+#' @description
+#' Generates data under an RR parametrization and computes estimates, SEs, CIs,
+#' and p-values for a suite of methods: BRM MLE, BRM+adaptive (Bayes fallback),
+#' CMH, log-binomial, log-Poisson, robust log-Poisson (quasi-Poisson + HC0),
+#' BRM+Firth, profile-exact (based on BRM), and several g-computation variants
+#' (plain, bias-reduction, Firth-corrected/FC with bias-reduction BR1/BR2). Returns a 5×14 matrix:
+#' rows = \code{point.est}, \code{se.est}, \code{con.lower}, \code{con.upper}, \code{p.value};
+#' columns labeled by method.
+#'
+#' @param n Integer. Sample size.
+#' @param event Character. \code{"common"} or \code{"rare"} to set truth.
+#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
+#'
+#' @return A numeric matrix with rows \code{point.est}, \code{se.est},
+#' \code{con.lower}, \code{con.upper}, \code{p.value} and 14 method columns:
+#' \code{c("brm","brm_ad","CMH","log-binomial","log-poisson","robust log-possion",
+#' "brm_firth","brm_exact","brm_exact_ad","g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")}.
+#'
+
+simulate.rr <- function(n, event, hypothesis) {
+  if (event == "common") {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(1.5, 0.6)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.3
+      beta.true <- c(1.65, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    }
+  } else {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(-4.7, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.7
+      beta.true <- c(-5.5, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    }
+  }
+
+  data.simulation <- data.generation("RR", n, alpha.true, beta.true, gamma.true)
+
+  va <- as.matrix(data.simulation$data$v.1, ncol = 1)
+  vb <- cbind(data.simulation$data$v.1, data.simulation$data$v.2)
+  y <- data.simulation$data$y
+  x <- data.simulation$data$x
+  Na0 <- data.simulation$count[1]
+  Na1 <- data.simulation$count[2]
+  N0_1 <- data.simulation$count[3]
+  N1_1 <- data.simulation$count[4]
+  P0 <- N0_1 / Na0
+  P1 <- N1_1 / Na1
+
+  pa <- length(alpha.true)
+  pb <- length(beta.true)
+  alpha.start <- rep(0, pa)
+  beta.start <- rep(0, pb)
+
+  weight <- rep(1, length(y))
+  max.step <- min(pa * 20, 1000)
+  thres <- 1e-6
+  thres.dicho <- 1e-3
+
+  ## brm
+  est.brm <- MLEst("RR", y, x, va, vb, weight, max.step, thres,
+    alpha.start = rep(0, pa),
+    beta.start = rep(0, pb), pa, pb
+  )
+
+  ## CMH
+  sam.CMH <- matrix(c(Na0 - N0_1, Na1 - N1_1, N0_1, N1_1), 2, 2)
+  est.CMH <- riskratio(sam.CMH, method = "small", correction = TRUE)
+
+  ##
+  v.1 <- vb[, 1]
+  v.2 <- vb[, 2]
+  ## log-binomial
+  est.lb <- glm(y ~ x + v.1 + v.2 - 1, family = binomial(link = "log"), data = data.simulation$data, start = rep(-0.01, 3))
+
+  ## log-poisson
+  est.lp <- glm(y ~ x + v.1 + v.2 - 1, family = poisson(link = "log"), data = data.simulation$data)
+
+  ## robust log-poisson
+
+  est.rlp <- quasi.poisson(data.simulation$data)
+
+  ## brm + firth
+
+  est.brm.firth <- MLEst("RR", y, x, va, vb, weight, max.step, thres,
+    alpha.start = rep(0, pa),
+    beta.start = rep(0, pb), pa, pb, method = "firth"
+  )
+
+  ## brm_ad
+  est.brm.ad <- est.brm
+  if (P0 == 0 | P0 == 1 | P1 == 0 | P1 == 1) {
+    est.bayes <- bayes_est_RR(Na0, Na1, N0_1, N1_1)
+    est.brm.ad$point.est[1] <- est.bayes$point.est
+    est.brm.ad$se.est[1] <- est.bayes$se.est
+    est.brm.ad$conf.lower[1] <- est.bayes$conf.lower
+    est.brm.ad$conf.upper[1] <- est.bayes$conf.upper
+    est.brm.ad$p.value[1] <- est.bayes$p.value
+  }
+
+  ## g-computaion & g-computation_BR
+  Y1 <- y[which(x == 1)]
+  Y0 <- y[which(x == 0)]
+  V2.1 <- v.2[which(x == 1)]
+  V2.0 <- v.2[which(x == 0)]
+  X1 <- x[which(x == 1)]
+  X0 <- x[which(x == 0)]
+
+  data.treat <- data.frame(Y1, V2.1)
+  data.control <- data.frame(Y0, V2.0)
+
+  est.treat <- glm(Y1 ~ V2.1, family = binomial, data = data.treat)
+  est.control <- glm(Y0 ~ V2.0, family = binomial, data = data.control)
+
+  beta.hat.treat <- est.treat$coefficients
+  beta.hat.control <- est.control$coefficients
+
+  V.FC.treat <- cbind(1, V2.1)
+  V.FC.control <- cbind(1, V2.0)
+
+
+  beta.hat.star.treat <- beta.hat.treat + colMeans(hatvalues(est.treat) * phi(Y1, V.FC.treat, beta.hat.treat, sum(x == 1) / n))
+  beta.hat.star.control <- beta.hat.control + colMeans(hatvalues(est.control) * phi(Y0, V.FC.control, beta.hat.control, sum(x == 0) / n))
+
+  # beta_hat
+  p.hat.treat <- mean(c(Y1, predict(est.treat, newdata = data.control, type = "response")))
+  p.hat.control <- mean(c(Y0, predict(est.control, newdata = data.treat, type = "response")))
+  alpha.hat <- log(p.hat.treat / p.hat.control)
+
+  # beta_hat_star
+  p.hat.star.treat <- mean(c(Y1, m(V.FC.control %*% beta.hat.star.treat)))
+  p.hat.star.control <- mean(c(Y0, m(V.FC.treat %*% beta.hat.star.control)))
+  alpha.hat.star <- log(p.hat.star.treat / p.hat.star.control)
+
+  li.hat <- l.mu(Y1, V.FC.treat, beta.hat.treat, Y0, V.FC.control, beta.hat.control)
+  li.hat.star <- l.mu(Y1, V.FC.treat, beta.hat.star.treat, Y0, V.FC.control, beta.hat.star.control)
+
+  se.hat <- sqrt(var.est.RR(li.hat, p.hat.control, p.hat.treat))
+  se.hat.star <- sqrt(var.est.RR(li.hat.star, p.hat.star.control, p.hat.star.treat))
+
+  # beta_tilde
+  fit.treat <- logistf(Y1 ~ V2.1, data = data.treat)
+  fit.control <- logistf(Y0 ~ V2.0, data = data.control)
+
+  beta.tilde.treat <- fit.treat$coefficients
+  beta.tilde.control <- fit.control$coefficients
+
+  # beta_tilde_star
+  beta.tilde.star.treat <- beta.tilde.treat + colMeans(as.vector(hii(V.FC.treat, beta.tilde.treat)) * (phi(Y1, V.FC.treat, beta.tilde.treat, sum(x == 1) / n)
+  - (V.FC.treat * as.vector(1 - 2 * m(V.FC.treat %*% beta.tilde.treat))) %*% t(ginv(fish(V.FC.treat, beta.tilde.treat))) / 2))
+  beta.tilde.star.control <- beta.tilde.control + colMeans(as.vector(hii(V.FC.control, beta.tilde.control)) * (phi(Y0, V.FC.control, beta.tilde.control, sum(x == 0) / n)
+  - (V.FC.control * as.vector(1 - 2 * m(V.FC.control %*% beta.tilde.control))) %*% t(ginv(fish(V.FC.control, beta.tilde.control))) / 2))
+  # beta_tilde_doustar
+  beta.tilde.doustar.treat <- beta.tilde.treat - colMeans(as.vector(hii(V.FC.treat, beta.tilde.treat)) * ((V.FC.treat * as.vector(1 - 2 * m(V.FC.treat %*% beta.tilde.treat))) %*% t(ginv(fish(V.FC.treat, beta.tilde.treat))) / 2))
+  beta.tilde.doustar.control <- beta.tilde.control - colMeans(as.vector(hii(V.FC.control, beta.tilde.control)) * ((V.FC.control * as.vector(1 - 2 * m(V.FC.control %*% beta.tilde.control))) %*% t(ginv(fish(V.FC.control, beta.tilde.control))) / 2))
+
+  # beta_tilde
+  p.tilde.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.treat)))
+  p.tilde.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.control)))
+  alpha.tilde <- log(p.tilde.treat / p.tilde.control)
+
+  # beta_tilde_star
+  p.tilde.star.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.star.treat)))
+  p.tilde.star.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.star.control)))
+  alpha.tilde.star <- log(p.tilde.star.treat / p.tilde.star.control)
+
+  # beta_tilde_starstar
+  p.tilde.doustar.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.doustar.treat)))
+  p.tilde.doustar.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.doustar.control)))
+  alpha.tilde.doustar <- log(p.tilde.doustar.treat / p.tilde.doustar.control)
+
+  li.tilde <- l.mu(Y1, V.FC.treat, beta.tilde.treat, Y0, V.FC.control, beta.tilde.control)
+  li.tilde.star <- l.mu(Y1, V.FC.treat, beta.tilde.star.treat, Y0, V.FC.control, beta.tilde.star.control)
+  li.tilde.doustar <- l.mu(Y1, V.FC.treat, beta.tilde.doustar.treat, Y0, V.FC.control, beta.tilde.doustar.control)
+
+  se.tilde <- sqrt(var.est.RR(li.tilde, p.tilde.control, p.tilde.treat))
+  se.tilde.star <- sqrt(var.est.RR(li.tilde.star, p.tilde.star.control, p.tilde.star.treat))
+  se.tilde.doustar <- sqrt(var.est.RR(li.tilde.doustar, p.tilde.doustar.control, p.tilde.doustar.treat))
+
+
+  ## brm+exact
+  est.exact <- exact("RR", y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm$point.est, est.brm$se.est, pa, pb)
+  est.exact.ad <- exact("RR", y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.ad$point.est, est.brm.ad$se.est, pa, pb)
+
+  ### result
+  point.est <- as.vector(c(
+    est.brm$point.est[1],
+    est.brm.ad$point.est[1],
+    log(est.CMH$measure[2, 1]),
+    est.lb$coefficients[1],
+    est.lp$coefficients[1],
+    est.rlp[1],
+    est.brm.firth$point.est[1],
+    est.brm$point.est[1],
+    est.brm.ad$point.est[1],
+    alpha.hat,
+    alpha.hat.star,
+    alpha.tilde,
+    alpha.tilde.star,
+    alpha.tilde.doustar
+  ))
+  se.est <- as.vector(c(
+    est.brm$se.est[1],
+    est.brm.ad$se.est[1],
+    (log(est.CMH$measure[2, 1]) - log(est.CMH$measure[2, 2])) / qnorm(0.975),
+    summary(est.lb)$coefficients[1, 2],
+    summary(est.lp)$coefficients[1, 2],
+    est.rlp[2],
+    est.brm.firth$se.est[1],
+    est.brm$se.est[1],
+    est.brm.ad$se.est[1],
+    se.hat,
+    se.hat.star,
+    se.tilde,
+    se.tilde.star,
+    se.tilde.doustar
+  ))
+  con.lower <- as.vector(c(
+    est.brm$conf.lower[1],
+    est.brm.ad$conf.lower[1],
+    log(est.CMH$measure[2, 2]),
+    confint.default(est.lb, level = 0.95)[1, 1],
+    confint.default(est.lp, level = 0.95)[1, 1],
+    est.rlp[3],
+    est.brm.firth$conf.lower[1],
+    est.exact$low[1],
+    est.exact.ad$low[1],
+    alpha.hat - qnorm(0.975) * se.hat,
+    alpha.hat.star - qnorm(0.975) * se.hat.star,
+    alpha.tilde - qnorm(0.975) * se.tilde,
+    alpha.tilde.star - qnorm(0.975) * se.tilde.star,
+    alpha.tilde.doustar - qnorm(0.975) * se.tilde.doustar
+  ))
+  con.upper <- as.vector(c(
+    est.brm$conf.upper[1],
+    est.brm.ad$conf.upper[1],
+    log(est.CMH$measure[2, 3]),
+    confint.default(est.lb, level = 0.95)[1, 2],
+    confint.default(est.lp, level = 0.95)[1, 2],
+    est.rlp[4],
+    est.brm.firth$conf.upper[1],
+    est.exact$up[1],
+    est.exact.ad$up[1],
+    alpha.hat + qnorm(0.975) * se.hat,
+    alpha.hat.star + qnorm(0.975) * se.hat.star,
+    alpha.tilde + qnorm(0.975) * se.tilde,
+    alpha.tilde.star + qnorm(0.975) * se.tilde.star,
+    alpha.tilde.doustar + qnorm(0.975) * se.tilde.doustar
+  ))
+
+  p.value <- as.vector(c(
+    est.brm$p.value[1],
+    est.brm.ad$p.value[1],
+    est.CMH$p.value[2, 1],
+    summary(est.lb)$coefficients[1, 4],
+    summary(est.lp)$coefficients[1, 4],
+    est.rlp[5],
+    est.brm.firth$p.value[1],
+    est.exact$p[1],
+    est.exact.ad$p[1],
+    2 * min(pnorm(alpha.hat / se.hat), 1 - pnorm(alpha.hat / se.hat)),
+    2 * min(pnorm(alpha.hat.star / se.hat.star), 1 - pnorm(alpha.hat.star / se.hat.star)),
+    2 * min(pnorm(alpha.tilde / se.tilde), 1 - pnorm(alpha.tilde / se.tilde)),
+    2 * min(pnorm(alpha.tilde.star / se.tilde.star), 1 - pnorm(alpha.tilde.star / se.tilde.star)),
+    2 * min(pnorm(alpha.tilde.doustar / se.tilde.doustar), 1 - pnorm(alpha.tilde.doustar / se.tilde.doustar))
+  ))
+
+  result.comp <- rbind(point.est, se.est, con.lower, con.upper, p.value)
+  colnames(result.comp) <- c(
+    "brm", "brm_ad", "CMH", "log-binomial", "log-poisson", "robust log-possion", "brm_firth",
+    "brm_exact", "brm_exact_ad", "g-computation", "GC_BR", "GC_FC", "GC_FC_BR1", "GC_FC_BR2"
+  )
+  return(result.comp)
+}
+
+#' Simulate and Compare RD Estimators Across Multiple Methods
+#'
+#' @description
+#' Generates data under an RD parametrization and computes estimates, SEs, CIs,
+#' and p-values for multiple methods: BRM MLE (original and adaptive Bayes),
+#' Bayesian RD with simple conjugate prior, GLM with identity link (if feasible),
+#' LPM with robust SEs, Miettinen–Nurminen (MN), BRM+Firth, profile-exact, and
+#' g-computation variants (plain, BR, FC and BR1/BR2). Returns a 5×14 matrix with
+#' rows \code{point.est}, \code{se.est}, \code{CI.low.or}, \code{CI.up.or}, \code{p.value}.
+#'
+#' @param n Integer. Sample size.
+#' @param event Character. \code{"common"} or \code{"rare"} to set truth.
+#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
+#'
+#' @return A numeric matrix with rows \code{point.est}, \code{se.est},
+#' \code{CI.low.or}, \code{CI.up.or}, \code{p.value} and 14 method columns:
+#' \code{c("brm","brm_ad","bayes","glm","lpm","MN","firth","brm_exact","brm_exact_ad",
+#' "g-computation","GC_BR","GC_FC","GC_FC_BR1","GC_FC_BR2")}.
+
+simulate.rd <- function(n, event, hypothesis) {
+  if (event == "common") {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(0.9, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.1
+      beta.true <- c(0.9, 0.2)
+      gamma.true <- c(0.2, -0.5)
+    }
+  } else {
+    if (hypothesis == "null") {
+      alpha.true <- 0
+      beta.true <- c(-4.5, 0.5)
+      gamma.true <- c(0.2, -0.5)
+    } else {
+      alpha.true <- 0.05
+      beta.true <- c(-5.5, 0.2)
+      gamma.true <- c(0.2, -0.5) # rare
+    }
+  }
+
+  data.simulation <- data.generation("RD", n, alpha.true, beta.true, gamma.true)
+  va <- as.matrix(data.simulation$data$v.1, ncol = 1)
+  vb <- cbind(data.simulation$data$v.1, data.simulation$data$v.2)
+  y <- data.simulation$data$y
+  x <- data.simulation$data$x
+  Na0 <- data.simulation$count[1]
+  Na1 <- data.simulation$count[2]
+  N0_1 <- data.simulation$count[3]
+  N1_1 <- data.simulation$count[4]
+
+  P0 <- N0_1 / Na0
+  P1 <- N1_1 / Na1
+
+  pa <- length(alpha.true)
+  pb <- length(beta.true)
+  alpha.start <- rep(0, pa)
+  beta.start <- rep(0, pb)
+
+  weight <- rep(1, length(y))
+  max.step <- min(pa * 20, 1000)
+  thres <- 1e-6
+
+  ## brm
+  est.brm.or <- MLEst("RD", y, x, va, vb, weight, max.step, thres,
+    alpha.start = rep(0, pa),
+    beta.start = rep(0, pb), pa, pb
+  )
+
+  est.brm.ad <- est.brm.or
+  if (P0 == 0 | P0 == 1 | P1 == 0 | P1 == 1) {
+    est.bayes <- bayes_est_RD(Na0, Na1, N0_1, N1_1)
+    est.brm.ad$point.est[1] <- est.bayes$point.est
+    est.brm.ad$se.est[1] <- est.bayes$se.est
+    est.brm.ad$conf.lower[1] <- est.bayes$conf.lower
+    est.brm.ad$conf.upper[1] <- est.bayes$conf.upper
+    est.brm.ad$p.value[1] <- est.bayes$p.value
+  }
+
+  ## bayesian prior
+  est.bayes <- bayes_est_RD(Na0, Na1, N0_1, N1_1)
+
+  v.1 <- vb[, 1]
+  v.2 <- vb[, 2]
+  ## GLM with identity link (calc_risk with identity link?)
+  e.glm <- glm(y ~ x + v.1 + v.2 - 1, family = binomial(link = "identity"), data = data.simulation$data, start = rep(0.01, 3))
+  est.glm <- get_estimate(e.glm$coefficients[1], summary(e.glm)$coefficients[1, 2], as.numeric(confint.default(e.glm, level = 0.95)[1, ]))
+
+  ## Linear probability model (LPM) + robust SE
+  lpm <- lm(y ~ x + v.1 + v.2 - 1, data = data.simulation$data)
+  e.lpm <- coeftest(lpm, vcov = vcovHC(lpm, type = "HC3"))
+  est.lpm <- get_estimate(e.lpm[1, 1], e.lpm[1, 2], as.numeric(c(e.lpm[1, 1] - 1.96 * e.lpm[1, 2], e.lpm[1, 1] + 1.96 * e.lpm[1, 2])))
+
+  ## Miettinen–Nurminen
+  est.MN.point <- P1 - P0
+  est.MN.CI <- diffscoreci(N1_1, Na1, N0_1, Na0, conf.level = 0.95)
+  est.MN.se <- (est.MN.CI$conf.int[2] - est.MN.CI$conf.int[1]) / (2 * qnorm(0.975))
+  est.MN <- get_estimate(est.MN.point, est.MN.se, c(est.MN.CI$conf.int[1], est.MN.CI$conf.int[2]))
+  #  p.MN <- 2*(1-pnorm(abs(z2stat(N1_1,Na1,N0_1,Na0,dif=0))))
+
+
+  ## g-computaion & g-computation_BR
+  Y1 <- y[which(x == 1)]
+  Y0 <- y[which(x == 0)]
+  V2.1 <- v.2[which(x == 1)]
+  V2.0 <- v.2[which(x == 0)]
+  X1 <- x[which(x == 1)]
+  X0 <- x[which(x == 0)]
+
+  data.treat <- data.frame(Y1, V2.1)
+  data.control <- data.frame(Y0, V2.0)
+
+  est.treat <- glm(Y1 ~ V2.1, family = binomial, data = data.treat)
+  est.control <- glm(Y0 ~ V2.0, family = binomial, data = data.control)
+
+  beta.hat.treat <- est.treat$coefficients
+  beta.hat.control <- est.control$coefficients
+
+  V.FC.treat <- cbind(1, V2.1)
+  V.FC.control <- cbind(1, V2.0)
+
+
+  beta.hat.star.treat <- beta.hat.treat + colMeans(hatvalues(est.treat) * phi(Y1, V.FC.treat, beta.hat.treat, sum(x == 1) / n))
+  beta.hat.star.control <- beta.hat.control + colMeans(hatvalues(est.control) * phi(Y0, V.FC.control, beta.hat.control, sum(x == 0) / n))
+
+  # beta_tilde
+  fit.treat <- logistf(Y1 ~ V2.1, data = data.treat)
+  fit.control <- logistf(Y0 ~ V2.0, data = data.control)
+
+  beta.tilde.treat <- fit.treat$coefficients
+  beta.tilde.control <- fit.control$coefficients
+
+  # beta_tilde_star
+  beta.tilde.star.treat <- beta.tilde.treat + colMeans(as.vector(hii(V.FC.treat, beta.tilde.treat)) * (phi(Y1, V.FC.treat, beta.tilde.treat, sum(x == 1) / n)
+  - (V.FC.treat * as.vector(1 - 2 * m(V.FC.treat %*% beta.tilde.treat))) %*% t(ginv(fish(V.FC.treat, beta.tilde.treat))) / 2))
+  beta.tilde.star.control <- beta.tilde.control + colMeans(as.vector(hii(V.FC.control, beta.tilde.control)) * (phi(Y0, V.FC.control, beta.tilde.control, sum(x == 0) / n)
+  - (V.FC.control * as.vector(1 - 2 * m(V.FC.control %*% beta.tilde.control))) %*% t(ginv(fish(V.FC.control, beta.tilde.control))) / 2))
+  # beta_tilde_doustar
+  beta.tilde.doustar.treat <- beta.tilde.treat - colMeans(as.vector(hii(V.FC.treat, beta.tilde.treat)) * ((V.FC.treat * as.vector(1 - 2 * m(V.FC.treat %*% beta.tilde.treat))) %*% t(ginv(fish(V.FC.treat, beta.tilde.treat))) / 2))
+  beta.tilde.doustar.control <- beta.tilde.control - colMeans(as.vector(hii(V.FC.control, beta.tilde.control)) * ((V.FC.control * as.vector(1 - 2 * m(V.FC.control %*% beta.tilde.control))) %*% t(ginv(fish(V.FC.control, beta.tilde.control))) / 2))
+
+  p.hat.treat <- mean(c(Y1, m(V.FC.control %*% beta.hat.treat)))
+  p.hat.control <- mean(c(Y0, m(V.FC.treat %*% beta.hat.control)))
+  alpha.hat <- atanh(p.hat.treat - p.hat.control)
+
+  # beta_hat_star
+  p.hat.star.treat <- mean(c(Y1, m(V.FC.control %*% beta.hat.star.treat)))
+  p.hat.star.control <- mean(c(Y0, m(V.FC.treat %*% beta.hat.star.control)))
+  alpha.hat.star <- atanh(p.hat.star.treat - p.hat.star.control)
+
+  # beta_tilde
+  p.tilde.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.treat)))
+  p.tilde.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.control)))
+  alpha.tilde <- atanh(p.tilde.treat - p.tilde.control)
+
+  p.tilde.star.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.star.treat)))
+  p.tilde.star.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.star.control)))
+  alpha.tilde.star <- atanh(p.tilde.star.treat - p.tilde.star.control)
+
+  # beta_tilde_starstar
+  p.tilde.doustar.treat <- mean(c(Y1, m(V.FC.control %*% beta.tilde.doustar.treat)))
+  p.tilde.doustar.control <- mean(c(Y0, m(V.FC.treat %*% beta.tilde.doustar.control)))
+  alpha.tilde.doustar <- atanh(p.tilde.doustar.treat - p.tilde.doustar.control)
+
+
+  li.hat <- l.mu(Y1, V.FC.treat, beta.hat.treat, Y0, V.FC.control, beta.hat.control)
+  li.hat.star <- l.mu(Y1, V.FC.treat, beta.hat.star.treat, Y0, V.FC.control, beta.hat.star.control)
+  li.tilde <- l.mu(Y1, V.FC.treat, beta.tilde.treat, Y0, V.FC.control, beta.tilde.control)
+  # li.firth <- l.mu(Y1,V.FC.treat,beta.firth.treat,Y0,V.FC.control,beta.firth.control)
+  li.tilde.star <- l.mu(Y1, V.FC.treat, beta.tilde.star.treat, Y0, V.FC.control, beta.tilde.star.control)
+  li.tilde.doustar <- l.mu(Y1, V.FC.treat, beta.tilde.doustar.treat, Y0, V.FC.control, beta.tilde.doustar.control)
+
+  se.hat <- sqrt(var.est.RD(li.hat, p.hat.control, p.hat.treat))
+  se.hat.star <- sqrt(var.est.RD(li.hat.star, p.hat.star.control, p.hat.star.treat))
+  se.tilde <- sqrt(var.est.RD(li.tilde, p.tilde.control, p.tilde.treat))
+  # se.firth <- sqrt(var.est(li.firth,p.firth.control,p.firth.treat))
+  se.tilde.star <- sqrt(var.est.RD(li.tilde.star, p.tilde.star.control, p.tilde.star.treat))
+  se.tilde.doustar <- sqrt(var.est.RD(li.tilde.doustar, p.tilde.doustar.control, p.tilde.doustar.treat))
+
+
+  ## brm_firth
+  est.brm.Firth <- MLEst("RD", y, x, va, vb, weight, max.step, thres,
+    alpha.start = rep(0, pa),
+    beta.start = rep(0, pb), pa, pb, method = "firth"
+  )
+
+
+  #### CI and p.value
+
+  est.exact.ad <- exact("RD", y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.ad$point.est, est.brm.ad$se.est, pa, pb)
+  est.exact <- exact("RD", y, x, va, vb, weight, max.step, thres, thres.dicho = 1e-3, est.brm.or$point.est, est.brm.or$se.est, pa, pb)
+
+
+  ### result
+  point.est <- c(
+    est.brm.or$point.est[1],
+    est.brm.ad$point.est[1],
+    est.bayes$point.est,
+    est.glm$point.est,
+    est.lpm$point.est,
+    est.MN$point.est,
+    est.brm.Firth$point.est[1],
+    est.brm.or$point.est[1],
+    est.brm.ad$point.est[1],
+    alpha.hat,
+    alpha.hat.star,
+    alpha.tilde,
+    alpha.tilde.star,
+    alpha.tilde.doustar
+  )
+  se.est <- c(
+    est.brm.or$se.est[1],
+    est.brm.ad$se.est[1],
+    est.bayes$se.est,
+    est.glm$se.est,
+    est.lpm$se.est,
+    est.MN$se.est,
+    est.brm.Firth$se.est[1],
+    est.brm.or$se.est[1],
+    est.brm.ad$se.est[1],
+    se.hat,
+    se.hat.star,
+    se.tilde,
+    se.tilde.star,
+    se.tilde.doustar
+  )
+  CI.low.or <- c(
+    est.brm.or$conf.lower[1],
+    est.brm.ad$conf.lower[1],
+    est.bayes$conf.lower,
+    est.glm$CI[1],
+    est.lpm$CI[1],
+    est.MN$CI[1],
+    est.brm.Firth$conf.lower[1],
+    est.exact$low[1],
+    est.exact.ad$low[1],
+    alpha.hat - qnorm(0.975) * se.hat,
+    alpha.hat.star - qnorm(0.975) * se.hat.star,
+    alpha.tilde - qnorm(0.975) * se.tilde,
+    alpha.tilde.star - qnorm(0.975) * se.tilde.star,
+    alpha.tilde.doustar - qnorm(0.975) * se.tilde.doustar
+  )
+  CI.up.or <- c(
+    est.brm.or$conf.upper[1],
+    est.brm.ad$conf.upper[1],
+    est.bayes$conf.upper,
+    est.glm$CI[2],
+    est.lpm$CI[2],
+    est.MN$CI[2],
+    est.brm.Firth$conf.upper[1],
+    est.exact$up[1],
+    est.exact.ad$up[1],
+    alpha.hat + qnorm(0.975) * se.hat,
+    alpha.hat.star + qnorm(0.975) * se.hat.star,
+    alpha.tilde + qnorm(0.975) * se.tilde,
+    alpha.tilde.star + qnorm(0.975) * se.tilde.star,
+    alpha.tilde.doustar + qnorm(0.975) * se.tilde.doustar
+  )
+  p.value <- c(
+    est.brm.or$p.value[1],
+    est.brm.ad$p.value[1],
+    est.bayes$p.value,
+    summary(e.glm)$coefficients[1, 4],
+    summary(lpm)$coefficients[1, 4],
+    min(pnorm(alpha.hat / se.hat), 1 - pnorm(alpha.hat / se.hat)),
+    est.brm.Firth$p.value[1],
+    est.exact$p[1],
+    est.exact.ad$p[1],
+    2 * min(pnorm(est.MN$point.est / est.MN$se.est), 1 - pnorm(est.MN$point.est / est.MN$se.est)),
+    2 * min(pnorm(alpha.hat.star / se.hat.star), 1 - pnorm(alpha.hat.star / se.hat.star)),
+    2 * min(pnorm(alpha.tilde / se.tilde), 1 - pnorm(alpha.tilde / se.tilde)),
+    2 * min(pnorm(alpha.tilde.star / se.tilde.star), 1 - pnorm(alpha.tilde.star / se.tilde.star)),
+    2 * min(pnorm(alpha.tilde.doustar / se.tilde.doustar), 1 - pnorm(alpha.tilde.doustar / se.tilde.doustar))
+  )
+
+  result.comp <- rbind(point.est, se.est, CI.low.or, CI.up.or, p.value)
+  colnames(result.comp) <- c(
+    "brm", "brm_ad", "bayes", "glm", "lpm", "MN", "firth",
+    "brm_exact", "brm_exact_ad", "g-computation", "GC_BR", "GC_FC", "GC_FC_BR1", "GC_FC_BR2"
+  )
+  return(result.comp)
+}
+
+#' Run a Single Simulation for RR or RD
+#'
+#' @description
+#' Dispatch helper that runs \code{simulate.rr()} if \code{param="RR"} and
+#' \code{simulate.rd()} otherwise.
+#'
+#' @param param Character. \code{"RR"} or \code{"RD"}.
+#' @param n Integer. Sample size.
+#' @param event Character. \code{"common"} or \code{"rare"}.
+#' @param hypothesis Character. \code{"null"} or \code{"alternative"}.
+#'
+#' @return The matrix returned by the corresponding simulator.
+
+
+run <- function(param, n, event, hypothesis) {
+  simulate.fun <- if (param == "RR") simulate.rr else simulate.rd
+  result <- simulate.fun(n, event, hypothesis)
+  return(result)
+}
diff --git a/compare/getProbScalarRD.R b/compare/getProbScalarRD.R
index 613e948..00ea7a5 100644
--- a/compare/getProbScalarRD.R
+++ b/compare/getProbScalarRD.R
@@ -1,131 +1,137 @@
-
-#' Calculate risks from arctanh RD and log OP
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-
-#' Calculate risks from arctanh RD and log OP
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-#' 
-#' @examples getProbScalarRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = mapply(getProbScalarRD, logrr, logop)
-#' rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-
-
-getProbScalarRD = function(atanhrd, logop) {
-    
-    if(length(atanhrd) == 2){
-        logop   = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    
-    if (logop > 12) {
-        if (atanhrd < 0) {
-            p0 = 1
-            p1 = p0 + tanh(atanhrd)
-        } else {
-            p1 = 1
-            p0 = p1 - tanh(atanhrd)
-        }
-    } else {
-      if(logop < -12){
-        if (atanhrd < 0) {
-          p0 = -tanh(atanhrd)
-          p1 = 0
-        } else {
-          p1 = tanh(atanhrd)
-          p0 = 0 
-        }
-      } else {
-        ## not on boundary logop = 0; solving linear equations
-        if (same(logop, 0)) {
-            p0 = 0.5 * (1 - tanh(atanhrd))
-        } else {
-            p0 = (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * 
-                (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * 
-                (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 
-                1))
-        }
-        p1 = p0 + tanh(atanhrd)
-      }
-    }
-    return(c(p0, p1))
-} 
-
-#' Calculate risks from arctanh RD and log OP (vectorised)
-#' 
-#' @param atanhrd arctanh of risk difference
-#' 
-#' @param logop log of odds product
-#' 
-#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}. 
-#' 
-#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-#' 
-#' @examples getProbRD(0,0)
-#' 
-#' set.seed(0)
-#' logrr = rnorm(10,0,1)
-#' logop = rnorm(10,0,1)
-#' probs = getProbRD(logrr, logop)
-#' colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-#' probs
-#' 
-#' @export
-getProbRD = function(atanhrd, logop) {
-    if(is.matrix(atanhrd) && ncol(atanhrd) == 2){
-        logop = atanhrd[,2]
-        atanhrd = atanhrd[,1]
-    } else if(length(logop)==1 && is.na(logop) && length(atanhrd) == 2){
-        logop = atanhrd[2]
-        atanhrd = atanhrd[1]
-    }
-    p0 <- ifelse (logop > 12,
-                  ifelse(atanhrd < 0,
-                         1,
-                         1 - tanh(atanhrd)),
-                  ## not on boundary logop = 0; solving linear equations
-                  ifelse(logop < -12,
-                         ifelse(atanhrd < 0,
-                                - tanh(atanhrd),
-                                0),
-                         ifelse(same(logop, 0),
-                                              0.5 * (1 - tanh(atanhrd)),
-                                              (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop))))/(2 * (exp(logop) - 1)))))
-                  
-    p1 <- ifelse (logop > 12,
-                  ifelse(atanhrd < 0,
-                         1 + tanh(atanhrd),
-                         1),
-                  ## not on boundary logop = 0
-                  ifelse(logop < -12,
-                         ifelse(atanhrd < 0,
-                                0,
-                                tanh(atanhrd)),
-                         p0 + tanh(atanhrd))
-                  )
-    cbind(p0,p1)
-}
+#' Calculate risks from arctanh RD and log OP
+#'
+#' @param atanhrd arctanh of risk difference
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
+#'
+#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
+#'
+
+#' Calculate risks from arctanh RD and log OP
+#'
+#' @param atanhrd arctanh of risk difference
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
+#'
+#' @return a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
+#'
+#' @examples getProbScalarRD(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- mapply(getProbScalarRD, logrr, logop)
+#' rownames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+
+
+getProbScalarRD <- function(atanhrd, logop) {
+  if (length(atanhrd) == 2) {
+    logop <- atanhrd[2]
+    atanhrd <- atanhrd[1]
+  }
+
+  if (logop > 12) {
+    if (atanhrd < 0) {
+      p0 <- 1
+      p1 <- p0 + tanh(atanhrd)
+    } else {
+      p1 <- 1
+      p0 <- p1 - tanh(atanhrd)
+    }
+  } else {
+    if (logop < -12) {
+      if (atanhrd < 0) {
+        p0 <- -tanh(atanhrd)
+        p1 <- 0
+      } else {
+        p1 <- tanh(atanhrd)
+        p0 <- 0
+      }
+    } else {
+      ## not on boundary logop = 0; solving linear equations
+      if (same(logop, 0)) {
+        p0 <- 0.5 * (1 - tanh(atanhrd))
+      } else {
+        p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) *
+          (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) *
+          (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) -
+          1))
+      }
+      p1 <- p0 + tanh(atanhrd)
+    }
+  }
+  return(c(p0, p1))
+}
+
+#' Calculate risks from arctanh RD and log OP (vectorised)
+#'
+#' @param atanhrd arctanh of risk difference
+#'
+#' @param logop log of odds product
+#'
+#' @details The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
+#' The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
+#'
+#' @return a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
+#'
+#' @examples getProbRD(0, 0)
+#'
+#' set.seed(0)
+#' logrr <- rnorm(10, 0, 1)
+#' logop <- rnorm(10, 0, 1)
+#' probs <- getProbRD(logrr, logop)
+#' colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
+#' probs
+#'
+#' @export
+getProbRD <- function(atanhrd, logop) {
+  if (is.matrix(atanhrd) && ncol(atanhrd) == 2) {
+    logop <- atanhrd[, 2]
+    atanhrd <- atanhrd[, 1]
+  } else if (length(logop) == 1 && is.na(logop) && length(atanhrd) == 2) {
+    logop <- atanhrd[2]
+    atanhrd <- atanhrd[1]
+  }
+  p0 <- ifelse(logop > 12,
+    ifelse(atanhrd < 0,
+      1,
+      1 - tanh(atanhrd)
+    ),
+    ## not on boundary logop = 0; solving linear equations
+    ifelse(logop < -12,
+      ifelse(atanhrd < 0,
+        -tanh(atanhrd),
+        0
+      ),
+      ifelse(same(logop, 0),
+        0.5 * (1 - tanh(atanhrd)),
+        (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1))
+      )
+    )
+  )
+
+  p1 <- ifelse(logop > 12,
+    ifelse(atanhrd < 0,
+      1 + tanh(atanhrd),
+      1
+    ),
+    ## not on boundary logop = 0
+    ifelse(logop < -12,
+      ifelse(atanhrd < 0,
+        0,
+        tanh(atanhrd)
+      ),
+      p0 + tanh(atanhrd)
+    )
+  )
+  cbind(p0, p1)
+}
diff --git a/compare/run_simulation.R b/compare/run_simulation.R
index e10da2c..f1bab3f 100644
--- a/compare/run_simulation.R
+++ b/compare/run_simulation.R
@@ -1,10 +1,7 @@
-
-
 library(doParallel)
 library(foreach)
 library(doRNG)
 
-source("getProbScalarRR.R")
 source("getProbScalarRD.R")
 source("1_CallMLE.R")
 source("1.1_MLE_Point.R")
@@ -31,11 +28,11 @@ library(PropCIs)
 library(MASS)
 
 ### Modifiable parameters
-param = 'RD'           # or 'RR'
-n = 50                 # or 200, 500
-event = 'rare'         # or 'common'
-hypothesis = 'null'    # or 'alternative'
-R = 10                 # change with Monte Carlo
+param <- "RD" # or 'RR'
+n <- 50 # or 200, 500
+event <- "rare" # or 'common'
+hypothesis <- "null" # or 'alternative'
+R <- 10 # change with Monte Carlo
 ncores <- 5
 
 
@@ -43,28 +40,29 @@ ncores <- 5
 cl <- makeCluster(ncores)
 registerDoParallel(cl)
 
-result.mle <- foreach(r = (R-9):R,
-                      .packages = c("brm","epitools","geepack","sandwich","lmtest","brglm2",
-                                    "MASS","logistf","binom","epiR","PropCIs"),
-                      .options.RNG=1234) %dorng% {
-
-                        set.seed(r)
-
-                        r1 <- run(param,n,event,hypothesis)
-
-                        list(estimate = r1[1,],
-                             se = r1[2,],
-                             low = r1[3,],
-                             up = r1[4,],
-                             p = r1[5,])
-                         }
+result.mle <- foreach(
+  r = (R - 9):R,
+  .packages = c(
+    "brm", "epitools", "geepack", "sandwich", "lmtest", "brglm2",
+    "MASS", "logistf", "binom", "epiR", "PropCIs"
+  ),
+  .options.RNG = 1234
+) %dorng% {
+  set.seed(r)
+
+  r1 <- run(param, n, event, hypothesis)
+
+  list(
+    estimate = r1[1, ],
+    se = r1[2, ],
+    low = r1[3, ],
+    up = r1[4, ],
+    p = r1[5, ]
+  )
+}
 
 stopCluster(cl)
 Sys.time()
 result.all <- do.call(rbind, lapply(result.mle, as.data.frame))
 
-write.csv(result.all, file = paste0("simulation_results_",param,"_",event,"_",hypothesis,"_n_", n, "_R_", R,".csv"))
-
-
-
-
+write.csv(result.all, file = paste0("simulation_results_", param, "_", event, "_", hypothesis, "_n_", n, "_R_", R, ".csv"))
diff --git a/data-raw/example_data.R b/data-raw/example_data.R
index 5463702..60702e4 100644
--- a/data-raw/example_data.R
+++ b/data-raw/example_data.R
@@ -1,201 +1,200 @@
-
-source("../compare/getProbScalarRR.R")
-source("../compare/getProbScalarRD.R")
-source("../compare/MyFunc.R")
-
-get.truth.params <- function(param,
-                             event = c("common", "rare"),
-                             hypothesis = c("null", "alternative")) {
-  event <- match.arg(event)
-  hypothesis <- match.arg(hypothesis)
-
-  if (param == "RR") {
-    if (event == "common") {
-      if (hypothesis == "null") {
-        alpha.true <- 0
-        beta.true  <- c(1.5, 0.6)
-        gamma.true <- c(0.2, -0.5)
-      } else { # alternative
-        alpha.true <- 0.3
-        beta.true  <- c(1.65, 0.5)
-        gamma.true <- c(0.2, -0.5)
-      }
-    } else { # rare
-      if (hypothesis == "null") {
-        alpha.true <- 0
-        beta.true  <- c(-4.7, 0.5)
-        gamma.true <- c(0.2, -0.5)
-      } else { # alternative
-        alpha.true <- 0.7
-        beta.true  <- c(-5.5, 0.5)
-        gamma.true <- c(0.2, -0.5)
-      }
-    }
-  } else if (param == "RD") {
-    if (event == "common") {
-      if (hypothesis == "null") {
-        alpha.true <- 0
-        beta.true  <- c(0.9, 0.5)
-        gamma.true <- c(0.2, -0.5)
-      } else { # alternative
-        alpha.true <- 0.1
-        beta.true  <- c(0.9, 0.2)
-        gamma.true <- c(0.2, -0.5)
-      }
-    } else { # rare
-      if (hypothesis == "null") {
-        alpha.true <- 0
-        beta.true  <- c(-4.5, 0.5)
-        gamma.true <- c(0.2, -0.5)
-      } else { # alternative
-        alpha.true <- 0.05
-        beta.true  <- c(-5.5, 0.2)
-        gamma.true <- c(0.2, -0.5)
-      }
-    }
-  } else {
-    stop("param must be 'RR' or 'RD'")
-  }
-
-  list(alpha.true = alpha.true,
-       beta.true  = beta.true,
-       gamma.true = gamma.true)
-}
-
-
-data.generate <- function(param, distribution = "unif", n, alpha.true, beta.true, gamma.true){
-
-  getProb = if (param == "RR") getProbRR else getProbRD
-
-  v.1         = rep(1,n)       # intercept term
-  if(distribution == 'unif'){
-    v.2       = runif(n,0,0.6)
-  } else if(distribution == 'binom'){
-    v.2         = rbinom(n,1,0.3)
-  } else if(distribution == 'norm'){
-    v.2         = rnorm(n,0.5,0.3)
-  }
-  v           = cbind(v.1,v.2)
-  v.1 = as.matrix(v.1, ncol = 1)
-  pscore.true = exp(v %*% gamma.true) / (1+exp(v %*% gamma.true))
-  p0p1.true   = getProb(v.1 %*% alpha.true,v %*% beta.true)
-  x           = rbinom(n, 1, pscore.true)
-  pA.true       = p0p1.true[,1]
-  pA.true[x==1] = p0p1.true[x==1,2]
-  y = rbinom(n, 1, pA.true)
-
-  Na0 <- sum(x==0)
-  Na1 <- sum(x==1)
-  N0_1 <- sum(y[which(x==0)])
-  N1_1 <- sum(y[which(x==1)])
-
-  data.simulation <- list(data = data.frame(y,x,v), count = c(Na0,Na1,N0_1,N1_1))
-  return(data.simulation)
-}
-
-
-
-generate.example.data <- function(
-    n            = 50,
-    seed         = 1234,
-    distributions = c("unif", "binom", "norm")
-) {
-  if (!is.null(seed)) {
-    set.seed(seed)
-  }
-
-  #
-  distributions <- match.arg(distributions, choices = c("unif", "binom", "norm"), several.ok = TRUE)
-
-  # param × event × hypothesis × distribution
-  cases <- expand.grid(
-    param        = c("RR", "RD"),
-    event        = c("common", "rare"),
-    hypothesis   = c("null", "alternative"),
-    distribution = distributions,
-    stringsAsFactors = FALSE
-  )
-
-  ipd_list    <- vector("list", nrow(cases))
-  counts_list <- vector("list", nrow(cases))
-
-  for (i in seq_len(nrow(cases))) {
-    param_i        <- cases$param[i]
-    event_i        <- cases$event[i]
-    hypothesis_i   <- cases$hypothesis[i]
-    distribution_i <- cases$distribution[i]
-
-    truth <- get.truth.params(
-      param      = param_i,
-      event      = event_i,
-      hypothesis = hypothesis_i
-    )
-
-    sim_i <- data.generate(
-      param       = param_i,
-      distribution = distribution_i,
-      n           = n,
-      alpha.true  = truth$alpha.true,
-      beta.true   = truth$beta.true,
-      gamma.true  = truth$gamma.true
-    )
-
-    dat_i <- sim_i$data
-    dat_i$param        <- param_i
-    dat_i$event        <- event_i
-    dat_i$hypothesis   <- hypothesis_i
-    dat_i$distribution <- distribution_i
-
-    ipd_list[[i]] <- dat_i
-
-    counts_list[[i]] <- data.frame(
-      param        = param_i,
-      event        = event_i,
-      hypothesis   = hypothesis_i,
-      distribution = distribution_i,
-      Na0  = sim_i$count[1],
-      Na1  = sim_i$count[2],
-      N0_1 = sim_i$count[3],
-      N1_1 = sim_i$count[4]
-    )
-  }
-
-  ipd_all <- do.call(rbind, ipd_list)
-  rownames(ipd_all) <- NULL
-
-  counts_all <- do.call(rbind, counts_list)
-  rownames(counts_all) <- NULL
-
-  list(
-    ipd    = ipd_all,
-    counts = counts_all
-  )
-}
-
-
-## n = 50
-exdat.50 <- generate.example.data(n = 50, seed = 1234)
-example.ipd.50    <- exdat.50$ipd
-example.counts.50 <- exdat.50$counts
-
-## n = 200
-exdat.200 <- generate.example.data(n = 200, seed = 1234)
-example.ipd.200    <- exdat.200$ipd
-example.counts.200 <- exdat.200$counts
-
-## n = 500
-exdat.500 <- generate.example.data(n = 500, seed = 1234)
-example.ipd.500    <- exdat.500$ipd
-example.counts.500 <- exdat.500$counts
-
-## Output
-usethis::use_data(
-  example.ipd.50,
-  example.counts.50,
-  example.ipd.200,
-  example.counts.200,
-  example.ipd.500,
-  example.counts.500,
-  internal  = FALSE,
-  overwrite = TRUE
-)
+source("../compare/getProbScalarRR.R")
+source("../compare/getProbScalarRD.R")
+source("../compare/MyFunc.R")
+
+get.truth.params <- function(param,
+                             event = c("common", "rare"),
+                             hypothesis = c("null", "alternative")) {
+  event <- match.arg(event)
+  hypothesis <- match.arg(hypothesis)
+
+  if (param == "RR") {
+    if (event == "common") {
+      if (hypothesis == "null") {
+        alpha.true <- 0
+        beta.true <- c(1.5, 0.6)
+        gamma.true <- c(0.2, -0.5)
+      } else { # alternative
+        alpha.true <- 0.3
+        beta.true <- c(1.65, 0.5)
+        gamma.true <- c(0.2, -0.5)
+      }
+    } else { # rare
+      if (hypothesis == "null") {
+        alpha.true <- 0
+        beta.true <- c(-4.7, 0.5)
+        gamma.true <- c(0.2, -0.5)
+      } else { # alternative
+        alpha.true <- 0.7
+        beta.true <- c(-5.5, 0.5)
+        gamma.true <- c(0.2, -0.5)
+      }
+    }
+  } else if (param == "RD") {
+    if (event == "common") {
+      if (hypothesis == "null") {
+        alpha.true <- 0
+        beta.true <- c(0.9, 0.5)
+        gamma.true <- c(0.2, -0.5)
+      } else { # alternative
+        alpha.true <- 0.1
+        beta.true <- c(0.9, 0.2)
+        gamma.true <- c(0.2, -0.5)
+      }
+    } else { # rare
+      if (hypothesis == "null") {
+        alpha.true <- 0
+        beta.true <- c(-4.5, 0.5)
+        gamma.true <- c(0.2, -0.5)
+      } else { # alternative
+        alpha.true <- 0.05
+        beta.true <- c(-5.5, 0.2)
+        gamma.true <- c(0.2, -0.5)
+      }
+    }
+  } else {
+    stop("param must be 'RR' or 'RD'")
+  }
+
+  list(
+    alpha.true = alpha.true,
+    beta.true = beta.true,
+    gamma.true = gamma.true
+  )
+}
+
+
+data.generate <- function(param, distribution = "unif", n, alpha.true, beta.true, gamma.true) {
+  getProb <- if (param == "RR") getProbRR else getProbRD
+
+  v.1 <- rep(1, n) # intercept term
+  if (distribution == "unif") {
+    v.2 <- runif(n, 0, 0.6)
+  } else if (distribution == "binom") {
+    v.2 <- rbinom(n, 1, 0.3)
+  } else if (distribution == "norm") {
+    v.2 <- rnorm(n, 0.5, 0.3)
+  }
+  v <- cbind(v.1, v.2)
+  v.1 <- as.matrix(v.1, ncol = 1)
+  pscore.true <- exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true))
+  p0p1.true <- getProb(v.1 %*% alpha.true, v %*% beta.true)
+  x <- rbinom(n, 1, pscore.true)
+  pA.true <- p0p1.true[, 1]
+  pA.true[x == 1] <- p0p1.true[x == 1, 2]
+  y <- rbinom(n, 1, pA.true)
+
+  Na0 <- sum(x == 0)
+  Na1 <- sum(x == 1)
+  N0_1 <- sum(y[which(x == 0)])
+  N1_1 <- sum(y[which(x == 1)])
+
+  data.simulation <- list(data = data.frame(y, x, v), count = c(Na0, Na1, N0_1, N1_1))
+  return(data.simulation)
+}
+
+
+generate.example.data <- function(
+  n = 50,
+  seed = 1234,
+  distributions = c("unif", "binom", "norm")
+) {
+  if (!is.null(seed)) {
+    set.seed(seed)
+  }
+
+  #
+  distributions <- match.arg(distributions, choices = c("unif", "binom", "norm"), several.ok = TRUE)
+
+  # param × event × hypothesis × distribution
+  cases <- expand.grid(
+    param = c("RR", "RD"),
+    event = c("common", "rare"),
+    hypothesis = c("null", "alternative"),
+    distribution = distributions,
+    stringsAsFactors = FALSE
+  )
+
+  ipd_list <- vector("list", nrow(cases))
+  counts_list <- vector("list", nrow(cases))
+
+  for (i in seq_len(nrow(cases))) {
+    param_i <- cases$param[i]
+    event_i <- cases$event[i]
+    hypothesis_i <- cases$hypothesis[i]
+    distribution_i <- cases$distribution[i]
+
+    truth <- get.truth.params(
+      param      = param_i,
+      event      = event_i,
+      hypothesis = hypothesis_i
+    )
+
+    sim_i <- data.generate(
+      param = param_i,
+      distribution = distribution_i,
+      n = n,
+      alpha.true = truth$alpha.true,
+      beta.true = truth$beta.true,
+      gamma.true = truth$gamma.true
+    )
+
+    dat_i <- sim_i$data
+    dat_i$param <- param_i
+    dat_i$event <- event_i
+    dat_i$hypothesis <- hypothesis_i
+    dat_i$distribution <- distribution_i
+
+    ipd_list[[i]] <- dat_i
+
+    counts_list[[i]] <- data.frame(
+      param = param_i,
+      event = event_i,
+      hypothesis = hypothesis_i,
+      distribution = distribution_i,
+      Na0 = sim_i$count[1],
+      Na1 = sim_i$count[2],
+      N0_1 = sim_i$count[3],
+      N1_1 = sim_i$count[4]
+    )
+  }
+
+  ipd_all <- do.call(rbind, ipd_list)
+  rownames(ipd_all) <- NULL
+
+  counts_all <- do.call(rbind, counts_list)
+  rownames(counts_all) <- NULL
+
+  list(
+    ipd    = ipd_all,
+    counts = counts_all
+  )
+}
+
+
+## n = 50
+exdat.50 <- generate.example.data(n = 50, seed = 1234)
+example.ipd.50 <- exdat.50$ipd
+example.counts.50 <- exdat.50$counts
+
+## n = 200
+exdat.200 <- generate.example.data(n = 200, seed = 1234)
+example.ipd.200 <- exdat.200$ipd
+example.counts.200 <- exdat.200$counts
+
+## n = 500
+exdat.500 <- generate.example.data(n = 500, seed = 1234)
+example.ipd.500 <- exdat.500$ipd
+example.counts.500 <- exdat.500$counts
+
+## Output
+usethis::use_data(
+  example.ipd.50,
+  example.counts.50,
+  example.ipd.200,
+  example.counts.200,
+  example.ipd.500,
+  example.counts.500,
+  internal  = FALSE,
+  overwrite = TRUE
+)
diff --git a/man/brm-package.Rd b/man/brm-package.Rd
index 761054f..6f9265b 100644
--- a/man/brm-package.Rd
+++ b/man/brm-package.Rd
@@ -9,12 +9,12 @@ The function \code{brm} in this package provides an alternative to generalized l
 
 Unlike \code{glm}, which uses a single link function for the outcome, \code{brm} separates the nuisance model from the target model. This separation provides opportunities to choose nuisance models independently of the target model. To see why this is important, we may contrast it with the use of a GLM to model the log relative risk. In this setting one might use a Poisson regression (with interaction term) \eqn{log P(y = 1|x, va, vb) = \alpha * x * va + \beta * vb} (though such a model ignores the fact that \eqn{y} is binary); here \eqn{va} and \eqn{vb} are subsets of \eqn{v}. Such a Poisson model can be seen as a combination of two parts: a target model \eqn{log RR(va) = \alpha * va} and a nuisance model \eqn{log P(y = 1|x = 0, vb) = \beta *  vb}. However, this nuisance model is variation dependent of the target model so that predicted probabilities may go outside of \eqn{[0,1]}. Furthermore, one cannot solve this problem under a GLM framework as with a GLM, the target model and nuisance model are determined  \emph{simultaneously} through a link function.
 
-  More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}). 
-  
-   \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation. 
-   
+  More specifically, if the target model is a linear model on the conditional log Relative Risk (log RR) or ('logistically' transformed) conditional Risk Difference (atanh RD), \code{brm} fits a linear nuisance model for the conditional log Odds Product (log OP).  If the target model is a linear model on the conditional log Odds Ratio (log OR), \code{brm} fits a linear nuisance model on the conditional logit baseline risk, logit P(y = 1|x = 0, vb). Note in this case the target and nuisance models combine to form a simple logistic regression model (which is fitted using \code{glm}).
+
+   \code{brm} fits the three target models described above as they are simple and the parameter space is unconstrained. \code{brm} fits the nuisance models above as they are variation independent of the corresponding target model. This variation independence greatly facilitates parameter estimation and interpretation.
+
    \code{brm} also provides doubly robust fitting as an option such that the estimates for \eqn{\alpha} are still consistent and asymptotically normal even when the nuisance model is misspecified, provided that we have a correctly specified logistic model for the exposure probability \eqn{P(x=1|v)}. Such doubly robust estimation is only possible for the Relative Risk and Risk Difference, but not the Odds Ratio.
-   
+
   See Richardson et al. (2017) for more details.
 }
 \references{
diff --git a/man/brm.Rd b/man/brm.Rd
index 6454307..db9e623 100644
--- a/man/brm.Rd
+++ b/man/brm.Rd
@@ -10,15 +10,15 @@ brm(
   va,
   vb = NULL,
   param,
-  est.method = "MLE",
+  est_method = "MLE",
   vc = NULL,
   optimal = TRUE,
   weights = NULL,
   subset = NULL,
-  max.step = NULL,
+  max_step = NULL,
   thres = 1e-08,
-  alpha.start = NULL,
-  beta.start = NULL,
+  alpha_start = NULL,
+  beta_start = NULL,
   message = FALSE
 )
 }
@@ -34,7 +34,7 @@ brm(
 \item{param}{The measure of association. Can take value 'RD' (risk difference),
 'RR' (relative risk) or 'OR' (odds ratio)}
 
-\item{est.method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).}
+\item{est_method}{The method to be used in fitting the model. Can be 'MLE' (maximum likelihood estimation, the default) or 'DR' (doubly robust estimation).}
 
 \item{vc}{The covariates matrix for the probability of exposure, often called the propensity score. It can be specified via an object of class "\code{formula}" or a matrix. In the latter case, no intercept terms will be added to the covariates matrix. By default we fit a logistic regression model for the propensity score. (If not specified, defaults to va.)}
 
@@ -44,35 +44,35 @@ brm(
 
 \item{subset}{An optional vector specifying a subset of observations to be used in the fitting process.}
 
-\item{max.step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.}
+\item{max_step}{The maximal number of iterations to be passed into the \code{\link[stats]{optim}} function. The default is 1000.}
 
 \item{thres}{Threshold for judging convergence. The default is 1e-6.}
 
-\item{alpha.start}{Starting values for the parameters in the target model.}
+\item{alpha_start}{Starting values for the parameters in the target model.}
 
-\item{beta.start}{Starting values for the parameters in the nuisance model.}
+\item{beta_start}{Starting values for the parameters in the nuisance model.}
 
 \item{message}{Show optimization details? Ignored if the estimation method is 'MLE'. The default is FALSE.}
 }
 \value{
 A list consisting of
- \item{param}{the measure of association.} 
+ \item{param}{the measure of association.}
 
-\item{point.est}{ the point estimates.}
+\item{point_est}{ the point estimates.}
 
-\item{se.est}{the standard error estimates.}
+\item{se_est}{the standard error estimates.}
 
 \item{cov}{estimate of the covariance matrix for the estimates.}
 
-\item{conf.lower}{ the lower limit of the 95\% (marginal) confidence interval.}
+\item{conf_lower}{ the lower limit of the 95\% (marginal) confidence interval.}
 
-\item{conf.upper}{ the upper limit of the 95\% (marginal) confidence interval.}
+\item{conf_upper}{ the upper limit of the 95\% (marginal) confidence interval.}
 
-\item{p.value}{the two sided p-value for testing zero coefficients.}
+\item{p_value}{the two sided p-value for testing zero coefficients.}
 
 \item{coefficients}{ the matrix summarizing key information: point estimate, 95\% confidence interval and p-value.}
 
-\item{param.est}{the fitted RR/RD/OR.}
+\item{param_est}{the fitted RR/RD/OR.}
 
 \item{va}{   the matrix of covariates for the target model.}
 
@@ -84,44 +84,46 @@ A list consisting of
 \code{brm} is used to estimate the association between two binary variables, and how that varies as a function of other covariates.
 }
 \details{
-\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation. 
-If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}. 
-If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}. 
-If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}. 
-For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}. 
+\code{brm} contains two parts: the target model for the dependence measure (RR, RD or OR) and the nuisance model; the latter is required for maximum likelihood estimation.
+If \code{param="RR"} then the target model is \eqn{log RR(va) = \alpha*va}.
+If \code{param="RD"} then the target model is \eqn{atanh RD(va) = \alpha*va}.
+If \code{param="OR"} then the target model is \eqn{log OR(va) = \alpha*va}.
+For RR and RD, the nuisance model is for the log Odds Product: \eqn{log OP(vb) = \beta*vb}.
 For OR, the nuisance model is for the baseline risk: \eqn{logit(P(y=1|x=0,vb)) = \beta*vb.}
-In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}. 
+In each case the nuisance model is variation independent of the target model, which  ensures that the predicted probabilities lie in \eqn{[0,1]}.
 See Richardson et al. (2016+) for more details.
 
-If \code{est.method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
+If \code{est_method="DR"} then given a correctly specified logistic regression model for the propensity score \eqn{logit(P(x=1|vc)) = \gamma*vc}, estimation of the RR or RD is consistent, even if the log Odds Product model is misspecified. This estimation method is not available for the OR. See Tchetgen Tchetgen et al. (2014) for more details.
 
-When estimating RR and RD, \code{est.method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
+When estimating RR and RD, \code{est_method="DR"} is recommended unless it is known that the log Odds Product model is correctly specified. Optimal weights (\code{optimal=TRUE}) are also recommended to increase efficiency.
 
 For the doubly robust estimation method, MLE is used to obtain preliminary estimates of \eqn{\alpha}, \eqn{\beta} and \eqn{\gamma}. The estimate of \eqn{\alpha} is then updated by solving a doubly-robust estimating equation. (The estimate for \eqn{\beta} remains the MLE.)
 }
 \examples{
 set.seed(0)
-n           = 100
-alpha.true  = c(0,-1)
-beta.true   = c(-0.5,1)
-gamma.true  = c(0.1,-0.5)
-params.true = list(alpha.true=alpha.true, beta.true=beta.true, 
-     gamma.true=gamma.true)
-v.1         = rep(1,n)       # intercept term
-v.2         = runif(n,-2,2) 
-v           = cbind(v.1,v.2)
-pscore.true = exp(v \%*\% gamma.true) / (1+exp(v \%*\% gamma.true))
-p0p1.true   = getProbRR(v \%*\% alpha.true,v \%*\% beta.true)
-x           = rbinom(n, 1, pscore.true)  
-pA.true       = p0p1.true[,1]
-pA.true[x==1] = p0p1.true[x==1,2]
-y = rbinom(n, 1, pA.true)
-
-fit.mle = brm(y,x,v,v,'RR','MLE',v,TRUE)
-fit.drw = brm(y,x,v,v,'RR','DR',v,TRUE)
-fit.dru = brm(y,x,v,v,'RR','DR',v,FALSE)
-
-fit.mle2 = brm(y,x,~v.2, ~v.2, 'RR','MLE', ~v.2,TRUE) # same as fit.mle
+n <- 100
+alpha_true <- c(0, -1)
+beta_true <- c(-0.5, 1)
+gamma_true <- c(0.1, -0.5)
+params_true <- list(
+  alpha_true = alpha_true, beta_true = beta_true,
+  gamma_true = gamma_true
+)
+v.1 <- rep(1, n) # intercept term
+v.2 <- runif(n, -2, 2)
+v <- cbind(v.1, v.2)
+pscore_true <- exp(v \%*\% gamma_true) / (1 + exp(v \%*\% gamma_true))
+p0p1.true <- get_prob_rr(v \%*\% alpha_true, v \%*\% beta_true)
+x <- rbinom(n, 1, pscore_true)
+pA_true <- p0p1.true[, 1]
+pA_true[x == 1] <- p0p1.true[x == 1, 2]
+y <- rbinom(n, 1, pA_true)
+
+fit_mle <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+fit_drw <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+fit_dru <- brm(y, x, v, v, "RR", "DR", v, FALSE)
+
+fit_mle2 <- brm(y, x, ~v.2, ~v.2, "RR", "MLE", ~v.2, TRUE) # same as fit_mle
 
 }
 \references{
@@ -130,9 +132,9 @@ Thomas S. Richardson, James M. Robins and Linbo Wang. "On Modeling and Estimatio
 Eric J. Tchetgen Tchetgen, James M. Robins and Andrea Rotnitzky. "On doubly robust estimation in a semiparametric odds ratio model." Biometrika 97.1 (2010): 171-180.
 }
 \seealso{
-\code{getProbScalarRD}, \code{getProbRD} (vectorised), \code{getProbScalarRR} and \code{getProbRR} (vectorised) for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
+\code{get_prob_rd}, and \code{get_prob_rr} for functions calculating risks P(y=1|x=1) and P(y=1|x=0) from (atanh RD, log OP) or (log RR, log OP);
 
-\code{predict.blm} for obtaining fitted probabilities from \code{brm} fits.
+\code{predict_brm} for obtaining fitted probabilities from \code{brm} fits.
 }
 \author{
 Linbo Wang <linbo.wang@utoronto.ca>, Mark Clements <mark.clements@ki.se>, Thomas Richardson <thomasr@uw.edu>
diff --git a/man/getProbRR.Rd b/man/getProbRR.Rd
deleted file mode 100644
index 838daee..0000000
--- a/man/getProbRR.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRR.R
-\name{getProbRR}
-\alias{getProbRR}
-\title{Calculate risks from log RR and log OP (vectorised)}
-\usage{
-getProbRR(logrr, logop = NA)
-}
-\arguments{
-\item{logrr}{log of relative risk}
-
-\item{logop}{log of odds product}
-}
-\value{
-a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
-}
-\description{
-Calculate risks from log RR and log OP (vectorised)
-}
-\details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
-}
-\examples{
-getProbRR(0,0)
-
-set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = getProbRR(logrr, logop)
-colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-probs
-
-}
diff --git a/man/getProbScalarRD.Rd b/man/getProbScalarRD.Rd
deleted file mode 100644
index d997cb6..0000000
--- a/man/getProbScalarRD.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRD.R
-\name{getProbScalarRD}
-\alias{getProbScalarRD}
-\title{Calculate risks from arctanh RD and log OP}
-\usage{
-getProbScalarRD(atanhrd, logop)
-}
-\arguments{
-\item{atanhrd}{arctanh of risk difference}
-
-\item{logop}{log of odds product}
-}
-\value{
-a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
-}
-\description{
-Calculate risks from arctanh RD and log OP
-}
-\details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
-The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
-}
-\examples{
-getProbScalarRD(0,0)
-
-set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = mapply(getProbScalarRD, logrr, logop)
-rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
-probs
-
-}
diff --git a/man/getProbRD.Rd b/man/get_prob_rd.Rd
similarity index 65%
rename from man/getProbRD.Rd
rename to man/get_prob_rd.Rd
index 14e7292..fb193b8 100644
--- a/man/getProbRD.Rd
+++ b/man/get_prob_rd.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRD.R
-\name{getProbRD}
-\alias{getProbRD}
+% Please edit documentation in R/get_prob_rd.R
+\name{get_prob_rd}
+\alias{get_prob_rd}
 \title{Calculate risks from arctanh RD and log OP (vectorised)}
 \usage{
-getProbRD(atanhrd, logop)
+get_prob_rd(atanhrd, logop)
 }
 \arguments{
 \item{atanhrd}{arctanh of risk difference}
@@ -18,17 +18,17 @@ a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
 Calculate risks from arctanh RD and log OP (vectorised)
 }
 \details{
-The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}. 
+The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
 The inverse hyperbolic tangent function \code{arctanh} is defined as \eqn{arctanh(z) = [log(1+z) - log(1-z)] / 2}.
 }
 \examples{
-getProbRD(0,0)
+get_prob_rd(0, 0)
 
 set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = getProbRD(logrr, logop)
-colnames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
+logrr <- rnorm(10, 0, 1)
+logop <- rnorm(10, 0, 1)
+probs <- get_prob_rd(logrr, logop)
+colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
 probs
 
 }
diff --git a/man/getProbScalarRR.Rd b/man/get_prob_rr.Rd
similarity index 54%
rename from man/getProbScalarRR.Rd
rename to man/get_prob_rr.Rd
index be6e84f..29f729a 100644
--- a/man/getProbScalarRR.Rd
+++ b/man/get_prob_rr.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/getProbScalarRR.R
-\name{getProbScalarRR}
-\alias{getProbScalarRR}
+% Please edit documentation in R/get_prob_rr.R
+\name{get_prob_rr}
+\alias{get_prob_rr}
 \title{Calculate risks from log RR and log OP}
 \usage{
-getProbScalarRR(logrr, logop = NA)
+get_prob_rr(logrr, logop = NA)
 }
 \arguments{
 \item{logrr}{log of relative risk}
@@ -12,7 +12,7 @@ getProbScalarRR(logrr, logop = NA)
 \item{logop}{log of odds product}
 }
 \value{
-a vector \eqn{(P(y=1|x=0),P(y=1|x=1))}
+a matrix \eqn{(P(y=1|x=0),P(y=1|x=1))} with two columns
 }
 \description{
 Calculate risks from log RR and log OP
@@ -21,13 +21,13 @@ Calculate risks from log RR and log OP
 The \eqn{log OP} is defined as \eqn{log OP = log[(P(y=1|x=0)/P(y=0|x=0))*(P(y=1|x=1)/P(y=0|x=1))]}.
 }
 \examples{
-getProbScalarRR(0,0)
+get_prob_rr(0, 0)
 
 set.seed(0)
-logrr = rnorm(10,0,1)
-logop = rnorm(10,0,1)
-probs = mapply(getProbScalarRR, logrr, logop)
-rownames(probs) = c("P(y=1|x=0)","P(y=1|x=1)")
+logrr <- rnorm(10, 0, 1)
+logop <- rnorm(10, 0, 1)
+probs <- get_prob_rr(logrr, logop)
+colnames(probs) <- c("P(y=1|x=0)", "P(y=1|x=1)")
 probs
 
 }
diff --git a/man/predict.brm.Rd b/man/predict.brm.Rd
deleted file mode 100644
index a83cdaa..0000000
--- a/man/predict.brm.Rd
+++ /dev/null
@@ -1,27 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/predict.brm.R
-\name{predict.brm}
-\alias{predict.brm}
-\title{Fitted probabilities from \code{brm} fits}
-\usage{
-\method{predict}{brm}(object, x.new = NULL, va.new = NULL, vb.new = NULL, ...)
-}
-\arguments{
-\item{object}{A fitted object from function \code{brm}.}
-
-\item{x.new}{An optional vector of x.}
-
-\item{va.new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.}
-
-\item{vb.new}{An optional covariate matrix to make predictions with. If vb.new is omitted but va.new is not, then vb.new is set to be equal to va.new. If both vb.new and va.new are omitted, then the original matrix vb is used.}
-
-\item{...}{affecting the predictions produced.}
-}
-\value{
-If x.new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
-
-If x.new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x.new,va,vb).
-}
-\description{
-Calculate fitted probabilities from a fitted binary regression model object.
-}
diff --git a/man/predict_brm.Rd b/man/predict_brm.Rd
new file mode 100644
index 0000000..04b70d6
--- /dev/null
+++ b/man/predict_brm.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/predict_brm.R
+\name{predict_brm}
+\alias{predict_brm}
+\title{Fitted probabilities from \code{brm} fits}
+\usage{
+predict_brm(object, x_new = NULL, va_new = NULL, vb_new = NULL, ...)
+}
+\arguments{
+\item{object}{A fitted object from function \code{brm}.}
+
+\item{x_new}{An optional vector of x.}
+
+\item{va_new}{An optional covariate matrix to make predictions with. If omitted, the original matrix va is used.}
+
+\item{vb_new}{An optional covariate matrix to make predictions with. If vb_new is omitted but va_new is not, then vb_new is set to be equal to va_new. If both vb_new and va_new are omitted, then the original matrix vb is used.}
+
+\item{...}{affecting the predictions produced.}
+}
+\value{
+If x_new is omitted, a matrix consisting of fitted probabilities for p0 = P(y=1|x=0,va,vb) and p1 = P(y=1|x=1,va,vb).
+
+If x_new is supplied, a vector consisting of fitted probabilities px = P(y=1|x=x_new,va,vb).
+}
+\description{
+Calculate fitted probabilities from a fitted binary regression model object.
+}
diff --git a/man/print.brm.Rd b/man/print_brm.Rd
similarity index 71%
rename from man/print.brm.Rd
rename to man/print_brm.Rd
index 2430c01..6c04d12 100644
--- a/man/print.brm.Rd
+++ b/man/print_brm.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/Print.R
-\name{print.brm}
-\alias{print.brm}
+% Please edit documentation in R/util.R
+\name{print_brm}
+\alias{print_brm}
 \title{Ancillary function for printing}
 \usage{
-\method{print}{brm}(x, ...)
+print_brm(x, ...)
 }
 \arguments{
 \item{x}{a list obtained with the function 'brm'}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
deleted file mode 100644
index 31ddc91..0000000
--- a/src/RcppExports.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// Generated by using Rcpp::compileAttributes() -> do not edit by hand
-// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
-
-#include <RcppArmadillo.h>
-#include <Rcpp.h>
-
-using namespace Rcpp;
-
-#ifdef RCPP_USE_GLOBAL_ROSTREAM
-Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
-Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
-#endif
-
-// compute_augmentation_cpp
-arma::vec compute_augmentation_cpp(const arma::mat& va, const arma::mat& vb, const arma::mat& fisher, const arma::mat& k_rs, const arma::mat& k_stu, const arma::mat& k_s_tu);
-RcppExport SEXP _brm_compute_augmentation_cpp(SEXP vaSEXP, SEXP vbSEXP, SEXP fisherSEXP, SEXP k_rsSEXP, SEXP k_stuSEXP, SEXP k_s_tuSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::mat& >::type va(vaSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type vb(vbSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type fisher(fisherSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type k_rs(k_rsSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type k_stu(k_stuSEXP);
-    Rcpp::traits::input_parameter< const arma::mat& >::type k_s_tu(k_s_tuSEXP);
-    rcpp_result_gen = Rcpp::wrap(compute_augmentation_cpp(va, vb, fisher, k_rs, k_stu, k_s_tu));
-    return rcpp_result_gen;
-END_RCPP
-}
-// mat_vec_mul
-arma::mat mat_vec_mul(const arma::mat& m, const arma::colvec& v);
-RcppExport SEXP _brm_mat_vec_mul(SEXP mSEXP, SEXP vSEXP) {
-BEGIN_RCPP
-    Rcpp::RObject rcpp_result_gen;
-    Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::mat& >::type m(mSEXP);
-    Rcpp::traits::input_parameter< const arma::colvec& >::type v(vSEXP);
-    rcpp_result_gen = Rcpp::wrap(mat_vec_mul(m, v));
-    return rcpp_result_gen;
-END_RCPP
-}
-
-static const R_CallMethodDef CallEntries[] = {
-    {"_brm_compute_augmentation_cpp", (DL_FUNC) &_brm_compute_augmentation_cpp, 6},
-    {"_brm_mat_vec_mul", (DL_FUNC) &_brm_mat_vec_mul, 2},
-    {NULL, NULL, 0}
-};
-
-RcppExport void R_init_brm(DllInfo *dll) {
-    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
-    R_useDynamicSymbols(dll, FALSE);
-}
diff --git a/tests/testthat/test-brm.R b/tests/testthat/test-brm.R
index 5d9ba31..451c572 100644
--- a/tests/testthat/test-brm.R
+++ b/tests/testthat/test-brm.R
@@ -1,37 +1,39 @@
 # sanity checks for now
 
 set.seed(0)
-n = 100
-alpha.true = c(0,-1)
-beta.true = c(-0.5, 1)
-gamma.true = c(0.1,-0.5)
-params.true = list(alpha.true = alpha.true,
-                   beta.true = beta.true,
-                   gamma.true = gamma.true)
-v.1 = rep(1, n) # intercept term
-v.2 = runif(n,-2, 2)
-v = cbind(v.1, v.2)
-pscore.true = exp(v %*% gamma.true) / (1 + exp(v %*% gamma.true))
-p0p1.true = getProbRR(v %*% alpha.true, v %*% beta.true)
-x = rbinom(n, 1, pscore.true)
-pA.true = p0p1.true[, 1]
-pA.true[x == 1] = p0p1.true[x == 1, 2]
-y = rbinom(n, 1, pA.true)
+n <- 100
+alpha_true <- c(0, -1)
+beta_true <- c(-0.5, 1)
+gamma_true <- c(0.1, -0.5)
+params_true <- list(
+  alpha_true = alpha_true,
+  beta_true = beta_true,
+  gamma_true = gamma_true
+)
+v_1 <- rep(1, n) # intercept term
+v_2 <- runif(n, -2, 2)
+v <- cbind(v_1, v_2)
+pscore_true <- exp(v %*% gamma_true) / (1 + exp(v %*% gamma_true))
+p0p1_true <- get_prob_rr(v %*% alpha_true, v %*% beta_true)
+x <- rbinom(n, 1, pscore_true)
+pA_true <- p0p1_true[, 1]
+pA_true[x == 1] <- p0p1_true[x == 1, 2]
+y <- rbinom(n, 1, pA_true)
 
 test_that("example runs correctly with MLE", {
-  r = brm(y, x, v, v, 'RR', 'MLE', v, TRUE)
-  expected_result = c(
+  r <- brm(y, x, v, v, "RR", "MLE", v, TRUE)
+  expected_result <- c(
     "alpha 1" = -0.78035527,
     "alpha 2" = -1.63433532,
     "beta 1" = -1.06854529,
     "beta 2" = 0.02441527
   )
-  expect_equal(r$point.est, expected_result)
+  expect_equal(r$point_est, expected_result)
 })
 
 test_that("example runs correctly with DR", {
-  r = brm(y, x, v, v, 'RR', 'DR', v, TRUE)
-  expected_result = c(
+  r <- brm(y, x, v, v, "RR", "DR", v, TRUE)
+  expected_result <- c(
     "alpha 1" = -0.73984903,
     "alpha 2" = -1.33843580,
     "beta 1" = -1.06854529,
@@ -39,5 +41,5 @@ test_that("example runs correctly with DR", {
     "gamma 1" = 0.16779440,
     "gamma 2" = -0.47170547
   )
-  expect_equal(r$point.est, expected_result)
+  expect_equal(r$point_est, expected_result)
 })
diff --git a/tests/testthat/test-get_probs.R b/tests/testthat/test-get_probs.R
new file mode 100644
index 0000000..14703ce
--- /dev/null
+++ b/tests/testthat/test-get_probs.R
@@ -0,0 +1,86 @@
+test_that("get_prob_rr", {
+  expect_true(exists("get_prob_rr")) # prevent "empty test" notification
+
+  test_that("works when logrr < -12", {
+    logrr <- c(-15, -15)
+    logop <- c(-15, -15)
+    p0 <- get_prb_aux(logop - logrr)
+    p1 <- 0
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr = 0, logop = 0", {
+    p0 <- c(0.5, 0.5)
+    p1 <- c(0.5, 0.5)
+    expect_equal(get_prob_rr(c(0, 0), c(0, 0)), cbind(p0, p1))
+  })
+
+  test_that("works when logrr > 12, 12 > logop > -12", {
+    logrr <- c(13, 13)
+    logop <- c(5, 5)
+    p0 <- c(0, 0)
+    p1 <- get_prb_aux(logop + logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr < 12, logop > 12", {
+    logrr <- c(11, 11)
+    logop <- c(13, 13)
+    p0 <- exp(-logrr)
+    p1 <- get_prb_aux(logop + logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logrr < -12, logop > 12", {
+    logrr <- c(-11, -11)
+    logop <- c(13, 13)
+    p0 <- c(1, 1)
+    p1 <- exp(logrr)
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+
+  test_that("works when 12 > logrr > -12, 12 > logop > -12", {
+    logrr <- c(5, 5)
+    logop <- c(5, 5)
+    p0 <- (-(exp(logrr) + 1) * exp(logop) + sqrt(exp(2 * logop) * (exp(logrr) + 1)^2 + 4 * exp(logrr + logop) * (1 - exp(logop)))) / (2 * exp(logrr) * (1 - exp(logop)))
+    p1 <- exp(logrr) * p0
+    expect_equal(get_prob_rr(logrr, logop), cbind(p0, p1))
+  })
+})
+
+
+test_that("get_probs_rd", {
+  expect_true(exists("get_prob_rd")) # prevent "empty test" notification
+
+  test_that("works when logop > 350, atanhrd > 0", {
+    logop <- c(351, 351)
+    atanhrd <- c(1, 1)
+    p0 <- 1 - tanh(atanhrd)
+    p1 <- c(1, 1)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop > 350, atanhrd < 0", {
+    logop <- c(351, 351)
+    atanhrd <- c(-1, -1)
+    p0 <- c(1, 1)
+    p1 <- 1 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop == 0", {
+    logop <- c(0, 0)
+    atanhrd <- runif(2, -50, 50)
+    p0 <- 0.5 * (1 - tanh(atanhrd))
+    p1 <- p0 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+
+  test_that("works when logop != 0 & logop < 350", {
+    logop <- c(349, 349)
+    atanhrd <- runif(2, -50, 50)
+    p0 <- (-(exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd)) - sqrt((exp(logop) * (tanh(atanhrd) - 2) - tanh(atanhrd))^2 + 4 * exp(logop) * (1 - tanh(atanhrd)) * (1 - exp(logop)))) / (2 * (exp(logop) - 1))
+    p1 <- p0 + tanh(atanhrd)
+    expect_equal(get_prob_rd(atanhrd, logop), cbind(p0, p1))
+  })
+})
diff --git a/tests/testthat/test-mat_vec_mul.R b/tests/testthat/test-mat_vec_mul.R
index 73ee871..106accd 100644
--- a/tests/testthat/test-mat_vec_mul.R
+++ b/tests/testthat/test-mat_vec_mul.R
@@ -1,8 +1,8 @@
 test_that("mat_vec_mul works", {
-  m <- matrix(c(1,2,3,1,2,3), 2, 3)
-  v <- c(1,2,3)
-  r <- c(13,13)
-  dim(r) <- c(2,1)
-  
-  expect_equal(r, mat_vec_mul(m,v))
+  m <- matrix(c(1, 2, 3, 1, 2, 3), 2, 3)
+  v <- c(1, 2, 3)
+  r <- c(13, 13)
+  dim(r) <- c(2, 1)
+
+  expect_equal(r, mat_vec_mul(m, v))
 })
diff --git a/tests/testthat/test-util.R b/tests/testthat/test-util.R
new file mode 100644
index 0000000..1a0db71
--- /dev/null
+++ b/tests/testthat/test-util.R
@@ -0,0 +1,21 @@
+set.seed(0)
+
+test_that("expit function works", {
+  p <- runif(0, 1)
+  l <- log(p) - log(1 - p)
+  i <- expit(l)
+  expect_equal(i, 1 / l)
+})
+
+test_that("same function works", {
+  expect_true(same(5, 5))
+  expect_false(same(5, 5 + .Machine$double.eps^0.5))
+  expect_true(same(-5, 5, 10.1))
+  expect_false(same(1, 2, .5))
+})
+
+test_that("get_prob_aux function works", {
+  expect_equal(get_prb_aux(-501), 0)
+  expect_equal(get_prb_aux(20), 1)
+  expect_equal(get_prb_aux(0), .5 * (sqrt(5) - 1))
+})