Skip to content

Commit

Permalink
initial commit package
Browse files Browse the repository at this point in the history
  • Loading branch information
Rohlfing committed Jan 9, 2018
0 parents commit e2de9c4
Show file tree
Hide file tree
Showing 25 changed files with 1,742 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^data-raw$
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
inst/doc
.Rproj.user
24 changes: 24 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Package: qcapower
Type: Package
Title: Estimate power in QCA and the number of cases needed to achieve a target
level
Version: 0.0.0.9000
Imports:
ggplot2,
ggforce
Authors@R: c(
person("Ingo", "Rohlfing", email = "[email protected]", role = c("aut", "cre")),
person("Holger", "Doering", email = "[email protected]", role = c("aut")))
Description: The package allows you to estimate power of a term in a truth table
analysis. A term can be anything: a condition, conjunction or disjunction of any
of these. For ex ante power estimation, you need a consistency score, the number
of cases, an alternative hypothesis about the actual consistency score of the
term and a null hypothesis about a consistency score separating consistent from
inconsistent terms.
License: GPL-3
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
Suggests: knitr,
rmarkdown
VignetteBuilder: knitr
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Generated by roxygen2: do not edit by hand

export(qcapower)
export(qp_cases)
export(qp_cases_brute)
export(qp_quant_plot)
export(qp_run_plot)
import(ggforce)
import(ggplot2)
35 changes: 35 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#' Data simulated power estimates
#'
#' A dataset containing power simulations for different number of cases and
#' different values for null- and alternative hypothesis
#'
#' \describe{
#' \item{cases}{number of cases}
#' \item{null_hypo}{null hypothesis (H0)}
#' \item{alt_hypo}{alternative hypothesis}
#' \item{sims}{number of simulations}
#' \item{perms}{number of permutations}
#' \item{perms}{calculate power}
#' }
#'
#' @format A dataframe with simulation parameters and calculated power
"qp_sim_power"


#' Data simulated power estimates for plotting of 5\%-quantiles
#'
#' A dataset containing the estimated 5\%-quantiles from a power simulation with
#' 1000 simulations each with 10000 permutations. The value for the alternative
#' hypothesis was set to 1.
#'
#' \describe{
#' \item{power}{power estimate over 1000 simulations}
#' \item{powercum}{running power estimate for ith simulation}
#' \item{null_hypo}{null hypothesis (H0), set to 0.8 (irrelevant here)}
#' \item{alt_hypo}{alternative hypothesis (H1), set to 1}
#' \item{cases}{number of cases, set to 10}
#' \item{quant}{estimated 5\%-quantiles per simulations}
#' }
#'
#' @format A dataframe with 1000 rows and 6 variables:
"qp_sina_data"
242 changes: 242 additions & 0 deletions R/qcapower.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
#' Estimate power in Qualitative Comparative Analysis (QCA).
#'
#' \code{qcapower} returns a power estimate for a term, given information about
#' the required parameters
#'
#' \code{qcapower} allows you to estimate power for a term, that is, to reject
#' the null hypothesis that a set relation is in place when it is in place, in
#' fact. A term can be a single condition, a conjunction, or a disjunction of
#' any combination of the two.
#'
#' @param cases Number of cases. In fuzzy-set QCA, equal to total number of
#' cases
#' @param null_hypo Null hypothesis (\emph{H0}). Consistency value separating
#' consistent from inconsistent terms.
#' @param alt_hypo Alternative hypothesis (\emph{H1}). Expected, actual
#' consistency value of term.
#' @param sims Number of simulations
#' @param perms Number of permutations per simulated dataset
#' @param alpha Level of alpha at which significance of H0 is tested
#' @param cons_threshold Degree of tolerance in generating data with consistency
#' equaling \code{alt_hypo} (see vignette)
#' @param set_seed Parameter for achieving reproducibility of estimate
#' @return A dataframe with rows equaling the number of \code{sims}.
#' \code{power} is the power estimate and is identical for each rows.
#' \code{powercum} is the running power estimate up to this row. \code{quant}
#' is the 5\%-quantile of the permuted distributions. See the vignette for
#' more information on \code{powercum} and \code{quant}.
#' @seealso \code{\link{qp_quant_plot}} and \code{\link{qp_run_plot}}
#' @examples
#' power_data <- qcapower(cases = 20, null_hypo = 0.8, alt_hypo = 0.95, sims = 10, perms = 1000)
#' head(power_data)
#' @export
qcapower <- function(cases, null_hypo, alt_hypo, sims = 1000, perms = 10000,
alpha = 0.05, cons_threshold = 0.01, set_seed = 135) {
null_sig <- vector("numeric", length = sims)
null_p <- vector("numeric", length = sims)
quant <- vector("numeric", length = sims)

set.seed(set_seed)
calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
random_draw <- function() round(runif(cases), digits = 2)
df <- data.frame(index=1:cases)

for(i in 1:sims) {
df$a <- random_draw()
df$y <- random_draw()
cons <- calc_consistency(df$a, df$y)
while(abs(alt_hypo - cons) > cons_threshold) {
if(cons < alt_hypo) {
sample_row <- sample(df[df$a > df$y, 'index'], 1)
df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
} else if(cons > alt_hypo) {
sample_row <- sample(df[ , 'index'], 1)
df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
}
cons <- calc_consistency(df$a, df$y)
}
cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y)))
quant[i] <- quantile(cons_perms, 0.05)
null_p[i] <- ecdf(cons_perms)(null_hypo) # p-value of null-hypo
null_sig[i] <- ifelse(null_p[i] + 1.96 * null_p[i] / sqrt(perms) < alpha, 1, 0)
}
power <- sum(null_sig) / sims
powercum <- cumsum(null_sig) / 1:sims
estim <- data.frame(power, powercum, alt_hypo, null_hypo, cases, quant)

return(estim)
}


#' Calculte the number of cases for a particular case target based on simulated
#' data
#'
#' \code{qp_cases} calculates the number of cases needed for a particular
#' power level. It is based on the pre-calculated data in \code(qp_sim_power).
#'
#' @param power_target Power level target
#' @param null_hypo Null hypothesis (\emph{H0}). Consistency value separating
#' consistent from inconsistent terms.
#' @param alt_hypo Alternative hypothesis (\emph{H1}). Expected, actual
#' consistency value of term.
#'
#' @seealso \code{\link{qp_cases_brute}}
#'
#' @examples
#' qp_cases(0.1, null_hypo = 0.8, alt_hypo = 1)
#'
#' @export
qp_cases <- function(power_target, null_hypo, alt_hypo) {
sim_data <- qp_sim_power
if(power_target < 0 | power_target > 1) {
stop("'power target' must be between 0 and 1")
}
if(! null_hypo %in% sim_data$null_hypo | ! alt_hypo %in% sim_data$alt_hypo) {
hypo_values <- sapply(c("null_hypo", "alt_hypo"),
function(.x) paste(unique(sim_data[[.x]]), collapse = ", "))
stop(sprintf("\n\nnull hypothesis must be %s\n\nalternative hypothesis must be %s",
hypo_values[["null_hypo"]], hypo_values[["alt_hypo"]]))
}

sim_select <- sim_data[sim_data$power >= power_target
& sim_data$null_hypo == null_hypo
& sim_data$alt_hypo == alt_hypo, "cases"]

if(length(sim_select) == 0) {
return(NA)
} else {
return(min(sim_select))
}
}


#' Calculte the number of cases for a particular case target with brute force
#'
#' \code{qp_cases_brute} calculates the number of cases needed for a particular
#' power level. The function starts at a \code(start_value) for the number of
#' cases and increments until the \code(power_target) is met or the
#' \code(max_value) has been reached.
#'
#' Running the function can take a lot of time. Use \code{\link{qp_cases}} to
#' calculate the number of cases based on simulated data.
#'
#' @param power_target Power level target
#' @param start_value Default number of cases for initial search
#' @param max_value Default maximum number of cases for search
#' @param progress Show progress of calculation (default \code{TRUE})
#' @param ... \code{qcapower} parameters -- see \code{\link{qcapower}}
#'
#' @seealso \code{\link{qp_cases_brute}}
#'
#' @examples
#' \dontrun{
#' qp_cases_brute(0.9, null_hypo = 0.80, alt_hypo = 1)
#'
#' qp_cases_brute(0.9, null_hypo = 0.80, alt_hypo = 1, start_value = 20,
#' max_value = 50, perms = 500)
#' }
#'
#' @export
qp_cases_brute <- function(power_target, start_value = 2, max_value = 100,
progress = TRUE, ...) {
cases <- start_value - 1
params <- list(...)
power <- 0

while(power < power_target) {
cases <- cases + 1
if(cases > max_value | cases <= 0) {
stop("Number of cases smaller than 0 or larger than 'max_value'")
}
params[['cases']] <- cases
power <- unique(do.call(qcapower, params)$power)
if(progress) {
print(sprintf('cases = %d -- power = %.2f', cases, power))
}
}
return(cases)
}


#' Plot of power estimate against the number of simulations
#'
#' \code{qp_run_plot} allows you to plot the running power estimate to
#' determine whether \code{sims} is sufficiently large to derive a reliable
#' estimate
#'
#' Creates a plot with \code{ggplot2}
#'
#' @param power_est Dataframe containing the simulation results (see
#' \code{\link{qcapower}})
#' @param title Option for adding title to plot (default \code{FALSE})
#' @examples
#' power_data <- qcapower(cases = 20, null_hypo = 0.8, alt_hypo = 0.95, sims = 10, perms = 1000)
#' qp_run_plot(power_data)
#'
#' # Using data with 10000 estimates
#' data(qp_sina_data)
#' qp_run_plot(qp_sina_data)
#' @import ggplot2
#' @export
qp_run_plot <- function(power_est, title = FALSE) {
pl_title = "Power estimate over simulations"
pl_y_label = "running power estimate"

power_est$id <- seq(1, nrow(power_est), by = 1)
pl <- ggplot(data=power_est, mapping=aes(x = id, y = powercum)) +
geom_line() +
scale_x_continuous(name = "simulation index",
breaks = c(1, nrow(power_est))) +
scale_y_continuous(name = pl_y_label,
breaks = seq(0, 1, by = 0.2),
limits = c(0, 1)) +
theme_bw()

if(title == TRUE) {
pl <- pl +
ggtitle(pl_title) +
theme(plot.title = element_text(hjust = 0.5))
}

print(pl)
}


#' Plot of 5%-quantiles for assessing the dispersion of the permutated
#' distributions
#'
#' Depending on the number of cases, the permuted distributions of consistency
#' values can differ widely in terms of their location on the spectrum and their
#' shape.
#'
#' Creates a sina plot with \code{ggforce}
#'
#' @param power_est Dataframe containing simulation results (see
#' \code{\link{qcapower}})
#' @param title Option for adding title to plot (default \code{FALSE})
#' @examples
#' sim_data <- qp_sina_data
#' qp_quant_plot(sim_data)
#' @import ggplot2
#' @import ggforce
#' @export
qp_quant_plot <- function(power_est, title = FALSE) {
pl_title = "Distribution of 5%-quantiles"
pl_y_label = "estimated 5%-quantiles"

pl <- ggplot(data=power_est, mapping=aes(factor(cases), quant)) +
geom_sina(size=0.1) +
scale_x_discrete(labels = NULL, name = NULL) +
scale_y_continuous(name = pl_y_label,
breaks = seq(0, 1, by = 0.2),
limits = c(0, 1)) +
theme_bw()

if(title == TRUE) {
pl <- pl +
ggtitle(pl_title)
theme(plot.title = element_text(hjust = 0.5))
}

print(pl)
}
8 changes: 8 additions & 0 deletions data-raw/qp_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
library(qcapower)
library(devtools)

qp_sina_data <- qcapower(cases = 10, null_hypo = 0.8, alt_hypo = 1, sims = 1000, perms = 10000)
use_data(qp_sina_data)

qp_sim_power <- read.csv("simulations.csv", as.is = TRUE)
use_data(qp_sim_power)
39 changes: 39 additions & 0 deletions data-raw/simulations-qcapower.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# "qcapower()" function from qcapower-package
# to run simulations in "simulation.R" without installing package

qcapower <- function(cases, null_hypo, alt_hypo, sims = 1000, perms = 10000,
alpha = 0.05, cons_threshold = 0.01, set_seed = 135) {
null_sig <- vector("numeric", length = sims)
null_p <- vector("numeric", length = sims)
quant <- vector("numeric", length = sims)

set.seed(set_seed)
calc_consistency <- function(a, y) sum(pmin(a, y)) / sum(a)
random_draw <- function() round(runif(cases), digits = 2)
df <- data.frame(index=1:cases)

for(i in 1:sims) {
df$a <- random_draw()
df$y <- random_draw()
cons <- calc_consistency(df$a, df$y)
while(abs(alt_hypo - cons) > cons_threshold) {
if(cons < alt_hypo) {
sample_row <- sample(df[df$a > df$y, 'index'], 1)
df[sample_row, 'y'] <- runif(1, min = df[sample_row, 'y'], max = 1)
} else if(cons > alt_hypo) {
sample_row <- sample(df[ , 'index'], 1)
df[sample_row, 'y'] <- runif(1, min = 0, max = df[sample_row, 'y'])
}
cons <- calc_consistency(df$a, df$y)
}
cons_perms <- sapply(1:perms, function(x) calc_consistency(df$a, sample(df$y)))
quant[i] <- quantile(cons_perms, 0.05)
null_p[i] <- ecdf(cons_perms)(null_hypo) # p-value of null-hypo
null_sig[i] <- ifelse(null_p[i] + 1.96 * null_p[i] / sqrt(perms) < alpha, 1, 0)
}
power <- sum(null_sig) / sims
powercum <- cumsum(null_sig) / 1:sims
estim <- data.frame(power, powercum, alt_hypo, null_hypo, cases, quant)

return(estim)
}
Loading

0 comments on commit e2de9c4

Please sign in to comment.