From 9db6098191d5873c33c28f06c9701ca0c54c104c Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 08:29:41 -0400 Subject: [PATCH 01/17] reorg common functions for joining profileIDs --- R/design.R | 70 +++++++++++++++++++++++++++--------------------- R/input_checks.R | 6 ++--- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/R/design.R b/R/design.R index caf79b3..58b546d 100644 --- a/R/design.R +++ b/R/design.R @@ -339,14 +339,6 @@ get_col_types <- function(data) { return(unlist(lapply(types, test))) } -get_type_ids <- function(profile_lvls) { - types <- get_col_types(profile_lvls) - ids <- list() - ids$discrete <- types %in% c("factor", "character") - ids$continuous <- !ids$discrete - return(ids) -} - reorder_cols <- function(design) { metaNames <- c("profileID", "respID", "qID", "altID", "obsID") varNames <- setdiff(names(design), metaNames) @@ -375,25 +367,18 @@ make_design_deff <- function( } # Set up levels and coding - ids <- get_type_ids(profile_lvls) - lvl.names <- list() - for (i in seq_len(ncol(profile_lvls))) { - if (ids$discrete[i]) { - lvl.names[[i]] <- levels(profile_lvls[,i]) - } else { - lvl.names[[i]] <- unique(profile_lvls[,i]) - } - } + lvl.names <- unname(get_profile_list(profiles)) lvls <- unname(unlist(lapply(lvl.names, function(x) length(x)))) coding <- rep("C", length(lvls)) c.lvls <- NULL - if (any(ids$continuous)) { - c.lvls <- lvl.names[ids$continuous] + type_ids <- get_type_ids(profiles) + if (any(type_ids$continuous)) { + c.lvls <- lvl.names[type_ids$continuous] } # lvl.names must be all characters for decoding process lvl.names <- lapply(lvl.names, function(x) as.character(x)) - if (any(ids$discrete)) { - coding[ids$discrete] <- "D" + if (any(type_ids$discrete)) { + coding[type_ids$discrete] <- "D" } no_choice_alt <- NULL alt_cte <- rep(0, n_alts) @@ -440,7 +425,7 @@ make_design_deff <- function( } else { D <- idefix::Modfed( cand.set = defineCandidateSet( - lvls, coding, c.lvls, profile_lvls, ids, profiles_restricted + lvls, coding, c.lvls, profile_lvls, type_ids, profiles_restricted ), par.draws = par_draws, n.alts = n_alts, @@ -466,9 +451,9 @@ make_design_deff <- function( # Join on profileIDs to design design <- design_raw$design names(design) <- varnames - design <- join_profiles(design, profiles, varnames, ids) + design <- join_profiles(design, profiles, type_ids) if (no_choice) { - design <- add_no_choice_deff(design, n_alts, varnames[ids$discrete]) + design <- add_no_choice_deff(design, n_alts, varnames[type_ids$discrete]) } # Include probs? @@ -503,8 +488,32 @@ make_design_deff <- function( return(design) } +get_type_ids <- function(profiles) { + types <- get_col_types(profiles[, 2:ncol(profiles)]) + ids <- list() + ids$discrete <- types %in% c("factor", "character") + ids$continuous <- !ids$discrete + return(ids) +} + +get_profile_list <- function(profiles) { + profile_lvls <- profiles[, 2:ncol(profiles)] + varnames <- names(profile_lvls) + type_ids <- get_type_ids(profiles) + profile_list <- list() + for (i in seq_len(ncol(profile_lvls))) { + if (type_ids$discrete[i]) { + profile_list[[i]] <- levels(profile_lvls[,i]) + } else { + profile_list[[i]] <- unique(profile_lvls[,i]) + } + } + names(profile_list) <- varnames + return(profile_list) +} + defineCandidateSet <- function( - lvls, coding, c.lvls, profile_lvls, ids, profiles_restricted + lvls, coding, c.lvls, profile_lvls, type_ids, profiles_restricted ) { # Make candidate set with profiles, assuming non-restricted cand_set <- idefix::Profiles( @@ -518,7 +527,7 @@ defineCandidateSet <- function( # including restricted profiles cand_set_res <- fastDummies::dummy_cols( profile_lvls, - select_columns = names(profile_lvls)[ids$discrete], + select_columns = names(profile_lvls)[type_ids$discrete], remove_first_dummy = TRUE, remove_selected_columns = TRUE ) @@ -540,7 +549,7 @@ defineCandidateSet <- function( return(cand_set_res) } -join_profiles <- function(design, profiles, varnames, ids) { +join_profiles <- function(design, profiles, type_ids) { # Replaces the generated design with rows from profiles, which ensures # factor levels in profiles are maintained in design @@ -548,16 +557,17 @@ join_profiles <- function(design, profiles, varnames, ids) { design$row_id <- seq(nrow(design)) # Convert numeric columns to actual numbers - for (id in which(ids$continuous)) { - design[,id] <- as.numeric(design[,id]) + for (id in which(type_ids$continuous)) { + design[,id] <- as.numeric(as.character(design[,id])) } # Convert character types to factors and set same levels as profiles - for (id in which(ids$discrete)) { + for (id in which(type_ids$discrete)) { design[,id] <- factor(design[,id], levels = levels(profiles[,id+1])) } # Join on profileIDs, then reorder to retain design order + varnames <- names(profiles[, 2:ncol(profiles)]) design <- merge(design, profiles, by = varnames, all.x = TRUE) design <- design[order(design$row_id),] design <- design[c('profileID', varnames)] diff --git a/R/input_checks.R b/R/input_checks.R index ef5b50c..d50e44e 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -79,8 +79,8 @@ check_inputs_design <- function( } # Check that prior levels aren't missing - ids <- get_type_ids(profile_lvls) - for (id in which(ids$discrete)) { + type_ids <- get_type_ids(profiles) + for (id in which(type_ids$discrete)) { n_lvls <- length(unique(profile_lvls[,id])) - 1 if (length(priors[[id]]) != n_lvls) { stop( @@ -90,7 +90,7 @@ check_inputs_design <- function( ) } } - for (id in which(ids$continuous)) { + for (id in which(type_ids$continuous)) { if (length(priors[[id]]) != 1) { stop( "Invalid number of values provided in 'priors' for the '", From 880afc22c07ab797c48b7cf664c7ce557bb773ea Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 10:08:08 -0400 Subject: [PATCH 02/17] revising how the `method` argument works for more methods --- R/design.R | 61 +++++++++++++++++++++++++++++++----------------- R/input_checks.R | 33 ++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 23 deletions(-) diff --git a/R/design.R b/R/design.R index 58b546d..117a0bf 100644 --- a/R/design.R +++ b/R/design.R @@ -38,9 +38,15 @@ #' design includes average predicted probabilities for each alternative in each #' choice set given the sample from the prior preference distribution. #' Defaults to `FALSE`. -#' @param method Which method to use for obtaining a Bayesian D-efficient -#' design, `"CEA"` or `"Modfed"`? If `priors` are specified, it defaults to -#' `"CEA"`, otherwise it defaults to `NULL`. See `?idefix::CEA` and +#' @param method Which design method to use? Defaults to `"random"` where choice +#' sets are created by randomly selecting from the full set of `profiles`. The +#' `"orthogonal"` method first finds an orthogonal array from `profiles` and +#' then randomly selects from it. For Bayesian D-efficient designs, use `"CEA"` +#' or `"Modfed"` along with specified `priors`. If priors are specified with no +#' specified `method`, `"CEA"` will be used. If `method` is set to `"CEA"` or +#' but without `priors` specified, a prior of all `0`s is used. If using a +#' restricted set of `profiles`, only the `"Modfed"` method can be used as +#' `"CEA"` requires unrestricted `profiles`. See `?idefix::CEA` and #' `?idefix::Modfed` for more details. #' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned #' object will be a list containing the design and the DB-error score. @@ -121,18 +127,12 @@ cbc_design <- function( priors = NULL, prior_no_choice = NULL, probs = FALSE, - method = NULL, + method = "random", keep_db_error = FALSE, max_iter = 50, parallel = TRUE ) { - if (!is.null(priors)) { - if (is.null(method)) { - # Set default method to 'CEA' if priors are specified and - # user didn't specify a method. - method <- 'CEA' - } - } + method <- check_design_method(method, priors) check_inputs_design( profiles, n_resp, @@ -152,16 +152,25 @@ cbc_design <- function( parallel ) profiles <- as.data.frame(profiles) # tibbles break things - if (is.null(priors)) { - design <- make_design_rand(profiles, n_resp, n_alts, n_q, no_choice, label) + if (method == 'random') { + design <- make_design_random( + profiles, n_resp, n_alts, n_q, no_choice, label + ) } else if (!is.null(label)) { warning( - "The use of the 'label' argument is currently only compatible with ", - "randomized designs, so the provided 'priors' are being ignored.\n" + 'The use of the "label" argument is currently only compatible with ', + 'random designs, so the "method" argument is being ignored and a ', + 'random design is being used\n' + ) + design <- make_design_random( + profiles, n_resp, n_alts, n_q, no_choice, label + ) + } else if (method == 'orthogonal') { + design <- make_design_orthogonal( + profiles, n_resp, n_alts, n_q, no_choice, label ) - design <- make_design_rand(profiles, n_resp, n_alts, n_q, no_choice, label) } else { - design <- make_design_deff( + design <- make_design_bayesian( profiles, n_resp, n_alts, n_q, n_blocks, n_draws, no_choice, n_start, label, priors, prior_no_choice, probs, method, keep_db_error, max_iter, parallel @@ -172,9 +181,11 @@ cbc_design <- function( return(design) } -# Randomized Design ---- +# Random Design ---- -make_design_rand <- function(profiles, n_resp, n_alts, n_q, no_choice, label) { +make_design_random <- function( + profiles, n_resp, n_alts, n_q, no_choice, label +) { if (is.null(label)) { design <- get_design_rand(profiles, n_resp, n_alts, n_q) } else { @@ -346,9 +357,17 @@ reorder_cols <- function(design) { return(design) } +# Orthogonal Design ---- + +make_design_orthogonal <- function( + profiles, n_resp, n_alts, n_q, no_choice, label +) { + +} + # Bayesian D-efficient Design ---- -make_design_deff <- function( +make_design_bayesian <- function( profiles, n_resp, n_alts, n_q, n_blocks, n_draws, no_choice, n_start, label, priors, prior_no_choice, probs, method, keep_db_error, max_iter, parallel @@ -405,7 +424,7 @@ make_design_deff <- function( method <- "Modfed" warning( 'The "CEA" algorithm requires the use of an unrestricted set of ', - 'profiles, so "Modfed" is being used instead.' + 'profiles, so "Modfed" is being used instead.\n' ) } diff --git a/R/input_checks.R b/R/input_checks.R index d50e44e..18a610f 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -23,6 +23,22 @@ check_inputs_restrict <- function(profiles) { } } +check_design_method <- function(method, priors) { + if (!is.null(priors)) { + if (! method %in% c('CEA', 'Modfed')) { + # Set method to 'CEA' if priors are specified and + # user didn't specify an appropriate method. + warning( + 'Since "priors" are specified, the "method" must be either "CEA" ', + 'or "Modfed". The specified "method" is being ignored and set to ', + '"CEA"\n' + ) + method <- 'CEA' + } + } + return(method) +} + check_inputs_design <- function( profiles, n_resp, @@ -58,13 +74,26 @@ check_inputs_design <- function( } + # Check that an appropriate method is used + + if (! method %in% c('random', 'orthogonal', 'CEA', 'Modfed')) { + stop( + 'The "method" argument must be set to "random", "orthogonal", ', + '"Modfed", or "CEA"' + ) + } + # Check that priors are appropriate if specified if (!is.null(priors)) { # Check that user specified an appropriate method - if ((method != "CEA") & (method != "Modfed")) { - stop('The method argument must be either "Modfed" or "CEA"') + # This should already be handled + if (! method %in% c('CEA', 'Modfed')) { + stop( + 'Since "priors" are specified, the "method" argument must ', + 'be either "Modfed" or "CEA"' + ) } # Check that prior names aren't missing From e37d89dba534e61b06b8d1b96d319dafc8804e90 Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 10:34:49 -0400 Subject: [PATCH 03/17] orthogonal arrays now in the code --- R/design.R | 45 +++++++++++++++++++++++++++++---------------- R/input_checks.R | 16 +++++++++++++--- R/tools.R | 4 ++++ 3 files changed, 46 insertions(+), 19 deletions(-) diff --git a/R/design.R b/R/design.R index 117a0bf..1b518c1 100644 --- a/R/design.R +++ b/R/design.R @@ -71,7 +71,8 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design +#' # Make a randomized survey design from all possible profiles +#' # (This is the default setting where method = 'random') #' design_rand <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents @@ -79,7 +80,17 @@ #' n_q = 6 # Number of questions per respondent #' ) #' -#' # Make a randomized survey design with a "no choice" option +#' # Make a survey design from an orthogonal array of profiles +#' design_rand <- cbc_design( +#' profiles = profiles, +#' n_resp = 300, # Number of respondents +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6 # Number of questions per respondent +#' method = 'orthogonal' +#' ) +#' +#' # Make a randomized survey design from all possible profiles +#' # with a "no choice" option #' design_rand_nochoice <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents @@ -88,8 +99,8 @@ #' no_choice = TRUE #' ) #' -#' # Make a randomized labeled survey design with each "type" appearing in -#' # each choice question +#' # Make a randomized survey design from all possible profiles +#' # with each level of the "type" attribute appearing as an alternative #' design_rand_labeled <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents @@ -153,16 +164,7 @@ cbc_design <- function( ) profiles <- as.data.frame(profiles) # tibbles break things if (method == 'random') { - design <- make_design_random( - profiles, n_resp, n_alts, n_q, no_choice, label - ) - } else if (!is.null(label)) { - warning( - 'The use of the "label" argument is currently only compatible with ', - 'random designs, so the "method" argument is being ignored and a ', - 'random design is being used\n' - ) - design <- make_design_random( + design <- get_randomized_design( profiles, n_resp, n_alts, n_q, no_choice, label ) } else if (method == 'orthogonal') { @@ -183,7 +185,7 @@ cbc_design <- function( # Random Design ---- -make_design_random <- function( +get_randomized_design <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { if (is.null(label)) { @@ -362,7 +364,18 @@ reorder_cols <- function(design) { make_design_orthogonal <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { - + oa <- as.data.frame(DoE.base::oa.design( + factor.names = get_profile_list(profiles)) + ) + if (nrow(oa) == nrow(profiles)) { + message("No orthogonal array found, using full factorial") + } else { + message("Using orthogonal array with ", nrow(oa), " rows") + } + type_ids <- get_type_ids(profiles) + oa <- join_profiles(oa, profiles, type_ids) + design <- get_randomized_design(oa, n_resp, n_alts, n_q, no_choice, label) + return(design) } # Bayesian D-efficient Design ---- diff --git a/R/input_checks.R b/R/input_checks.R index 18a610f..0e84600 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -25,7 +25,7 @@ check_inputs_restrict <- function(profiles) { check_design_method <- function(method, priors) { if (!is.null(priors)) { - if (! method %in% c('CEA', 'Modfed')) { + if (! method_is_bayesian(method)) { # Set method to 'CEA' if priors are specified and # user didn't specify an appropriate method. warning( @@ -74,6 +74,15 @@ check_inputs_design <- function( } + # The labeled design isn't yet supported for Bayesian D-efficient designs + + if (!is.null(label) & method_is_bayesian(method)) { + stop( + 'The use of the "label" argument is currently not compatible with ', + 'Bayesian D-efficient designs' + ) + } + # Check that an appropriate method is used if (! method %in% c('random', 'orthogonal', 'CEA', 'Modfed')) { @@ -89,10 +98,11 @@ check_inputs_design <- function( # Check that user specified an appropriate method # This should already be handled - if (! method %in% c('CEA', 'Modfed')) { + if (! method_is_bayesian(method)) { stop( 'Since "priors" are specified, the "method" argument must ', - 'be either "Modfed" or "CEA"' + 'be either "Modfed" or "CEA" to obtain a Bayesian ', + 'D-efficient design' ) } diff --git a/R/tools.R b/R/tools.R index 1e69262..a1ffd17 100644 --- a/R/tools.R +++ b/R/tools.R @@ -10,3 +10,7 @@ "Please cite the package in your publications, see:\ncitation(\"cbcTools\")" ) } + +method_is_bayesian <- function(method) { + return(method %in% c('CEA', 'Modfed')) +} From c1e2da21a9521fb2ba0de681b263ec74306f732e Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 10:44:27 -0400 Subject: [PATCH 04/17] Update cbc_design.Rd --- man/cbc_design.Rd | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index e3fad94..d69617f 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -17,7 +17,7 @@ cbc_design( priors = NULL, prior_no_choice = NULL, probs = FALSE, - method = NULL, + method = "random", keep_db_error = FALSE, max_iter = 50, parallel = TRUE @@ -68,9 +68,15 @@ design includes average predicted probabilities for each alternative in each choice set given the sample from the prior preference distribution. Defaults to \code{FALSE}.} -\item{method}{Which method to use for obtaining a Bayesian D-efficient -design, \code{"CEA"} or \code{"Modfed"}? If \code{priors} are specified, it defaults to -\code{"CEA"}, otherwise it defaults to \code{NULL}. See \code{?idefix::CEA} and +\item{method}{Which design method to use? Defaults to \code{"random"} where choice +sets are created by randomly selecting from the full set of \code{profiles}. The +\code{"orthogonal"} method first finds an orthogonal array from \code{profiles} and +then randomly selects from it. For Bayesian D-efficient designs, use \code{"CEA"} +or \code{"Modfed"} along with specified \code{priors}. If priors are specified with no +specified \code{method}, \code{"CEA"} will be used. If \code{method} is set to \code{"CEA"} or +but without \code{priors} specified, a prior of all \code{0}s is used. If using a +restricted set of \code{profiles}, only the \code{"Modfed"} method can be used as +\code{"CEA"} requires unrestricted \code{profiles}. See \code{?idefix::CEA} and \code{?idefix::Modfed} for more details.} \item{keep_db_error}{If \code{TRUE}, for Bayesian D-efficient designs the returned @@ -106,7 +112,8 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design +# Make a randomized survey design from all possible profiles +# (This is the default setting where method = 'random') design_rand <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents @@ -114,7 +121,17 @@ design_rand <- cbc_design( n_q = 6 # Number of questions per respondent ) -# Make a randomized survey design with a "no choice" option +# Make a survey design from an orthogonal array of profiles +design_rand <- cbc_design( + profiles = profiles, + n_resp = 300, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6 # Number of questions per respondent + method = 'orthogonal' +) + +# Make a randomized survey design from all possible profiles +# with a "no choice" option design_rand_nochoice <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents @@ -123,8 +140,8 @@ design_rand_nochoice <- cbc_design( no_choice = TRUE ) -# Make a randomized labeled survey design with each "type" appearing in -# each choice question +# Make a randomized survey design from all possible profiles +# with each level of the "type" attribute appearing as an alternative design_rand_labeled <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents From 227408a543b4c214cb1319d90fb599968b409f42 Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 11:09:56 -0400 Subject: [PATCH 05/17] removed dplyr dependency and added DoE.base --- DESCRIPTION | 2 +- R/design.R | 2 +- R/input_checks.R | 71 ++++++++++++++++++++++++------------------------ R/profiles.R | 20 ++++++++++++-- 4 files changed, 56 insertions(+), 39 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7111f7a..080df85 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Suggests: testthat, tibble Imports: - dplyr, + DoE.base, fastDummies, ggplot2, idefix, diff --git a/R/design.R b/R/design.R index 1b518c1..5fe3efe 100644 --- a/R/design.R +++ b/R/design.R @@ -85,7 +85,7 @@ #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question -#' n_q = 6 # Number of questions per respondent +#' n_q = 6, # Number of questions per respondent #' method = 'orthogonal' #' ) #' diff --git a/R/input_checks.R b/R/input_checks.R index 0e84600..cc185fb 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -3,7 +3,10 @@ check_inputs_profiles <- function(levels) { check_vector <- !is.vector(levels[[i]]) check_name <- is.null(names(levels)[i]) if (check_vector | check_name) { - stop("Each item in ... must be a named vector where the names are attributes and the values in the vector are levels of that attribute.") + stop( + 'Each item in "..." must be a named vector where the names are ', + 'attributes and the values in the vector are levels of that attribute' + ) } } } @@ -11,14 +14,14 @@ check_inputs_profiles <- function(levels) { check_inputs_restrict <- function(profiles) { # Check if profiles is a data frame if (!is.data.frame(profiles)) { - stop("The 'profiles' argument must be a data frame.") + stop('The "profiles" argument must be a data frame.') } # Check if profiles has been created by the cbc_profiles function if (!"profileID" %in% colnames(profiles)) { stop( - "The 'profiles' data frame must be created using the 'cbc_profiles' function ", - "and contain the 'profileID' variable." + 'The "profiles" data frame must be created using the "cbc_profiles()"', + 'function and contain the "profileID" variable.' ) } } @@ -64,23 +67,21 @@ check_inputs_design <- function( # If using a Bayesian D-efficient design with a no choice option, user must # specify a value for prior_no_choice if (no_choice) { - if (!is.null(priors) & is.null(prior_no_choice)) { - stop( - "If 'no_choice = TRUE', you must specify the prior utility ", - 'value for the "no choice" option using prior_no_choice' - ) - - } - + if (!is.null(priors) & is.null(prior_no_choice)) { + stop( + 'If "no_choice = TRUE", you must specify the prior utility ', + 'value for the "no choice" option using prior_no_choice' + ) + } } # The labeled design isn't yet supported for Bayesian D-efficient designs if (!is.null(label) & method_is_bayesian(method)) { - stop( - 'The use of the "label" argument is currently not compatible with ', - 'Bayesian D-efficient designs' - ) + stop( + 'The use of the "label" argument is currently not compatible with ', + 'Bayesian D-efficient designs' + ) } # Check that an appropriate method is used @@ -112,7 +113,7 @@ check_inputs_design <- function( missing <- setdiff(names(profile_lvls), prior_names) if (length(missing) > 0) { stop( - "'priors' is missing the following variables: \n\n", + '"priors" is missing the following variables: \n\n', paste(missing, collapse = "\n") ) } @@ -123,17 +124,17 @@ check_inputs_design <- function( n_lvls <- length(unique(profile_lvls[,id])) - 1 if (length(priors[[id]]) != n_lvls) { stop( - "Invalid number of values provided in 'priors' for the '", - prior_names[id], "' attribute. Please provide ", n_lvls, - " values" + 'Invalid number of values provided in "priors" for the "', + prior_names[id], '" attribute. Please provide ', n_lvls, + ' values' ) } } for (id in which(type_ids$continuous)) { if (length(priors[[id]]) != 1) { stop( - "Invalid number of values provided in 'priors' for the '", - prior_names[id], "' attribute. Please provide 1 value" + 'Invalid number of values provided in "priors" for the "', + prior_names[id], '" attribute. Please provide 1 value' ) } } @@ -142,12 +143,12 @@ check_inputs_design <- function( # Check that the number of alternatives per observation is larger than # the number of unique profiles if (n_alts > nrow(profiles)) { - stop( - "The number of alternatives per observation, specified by n_alts, ", - "is larger than the number of unique profiles. Either decrease ", - "n_alts to be less than ", nrow(profiles), " or add more ", - "attributes / levels to increase the number of profiles." - ) + stop( + 'The number of alternatives per observation, specified by "n_alts", ', + "is larger than the number of unique profiles. Either decrease ", + '"n_alts" to be less than ', nrow(profiles), " or add more ", + "attributes / levels to increase the number of profiles." + ) } # Check that number of questions per respondents is larger than the @@ -164,13 +165,13 @@ check_inputs_design <- function( ncomb <- choose(n, k) # More robust # ncomb <- factorial(n) / (factorial(k)*(factorial(n-k))) if (n_q > ncomb) { - stop( - "The number of questions per respondent, specified by n_q, ", - "is larger than the number of unique sets of choice sets. ", - "You can correct this by decreasing n_q to be less than ", - ncomb, ", decreasing n_alts, or add more attributes / levels ", - "to increase the number of choice set combinations." - ) + stop( + 'The number of questions per respondent, specified by "n_q", ', + "is larger than the number of unique sets of choice sets. ", + 'You can correct this by decreasing "n_q" to be less than ', + ncomb, ', decreasing "n_alts", or add more attributes / levels ', + "to increase the number of choice set combinations." + ) } } } diff --git a/R/profiles.R b/R/profiles.R index 81ab306..3f3a4dc 100644 --- a/R/profiles.R +++ b/R/profiles.R @@ -59,12 +59,28 @@ cbc_profiles <- function(...) { #' type == "Honeycrisp" & freshness == "Poor", #' type == "Fuji" & freshness == "Excellent" #' ) + cbc_restrict <- function(profiles, ...) { check_inputs_restrict(profiles) + + # drop_ids <- unique(unlist(lapply( + # rlang::enquos(...), + # function(x) { + # dplyr::filter(profiles, !!x) |> dplyr::pull(.data$profileID) + # } + # ))) + + # Came up with a different approach to avoid the {dplyr} dependency + drop_ids <- unique(unlist(lapply( - rlang::enquos(...), - function(x) dplyr::filter(profiles, !!x) |> dplyr::pull(.data$profileID) + rlang::enexprs(...), + function(x) { + subset_ids <- subset(profiles, eval(x), select = c("profileID")) + as.character(subset_ids$profileID) + } ))) + drop_ids <- which(profiles$profileID %in% drop_ids) + profiles <- profiles[-drop_ids,] profiles <- add_profile_ids(profiles) return(profiles) From f4bd49ce03d521023acf6157a1714f6bfcac0c5b Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 11:17:56 -0400 Subject: [PATCH 06/17] Update cbc_design.Rd --- man/cbc_design.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index d69617f..03b0ada 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -126,7 +126,7 @@ design_rand <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_q = 6, # Number of questions per respondent method = 'orthogonal' ) From c831e798d2d54196bae2da4c2bb16261a3eb244c Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 11:25:04 -0400 Subject: [PATCH 07/17] update messages from orthogonal approach --- R/design.R | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/R/design.R b/R/design.R index 5fe3efe..66758b3 100644 --- a/R/design.R +++ b/R/design.R @@ -81,7 +81,7 @@ #' ) #' #' # Make a survey design from an orthogonal array of profiles -#' design_rand <- cbc_design( +#' design_ortho <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -364,13 +364,18 @@ reorder_cols <- function(design) { make_design_orthogonal <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { - oa <- as.data.frame(DoE.base::oa.design( - factor.names = get_profile_list(profiles)) - ) + oa <- suppressMessages(as.data.frame( + DoE.base::oa.design( + factor.names = get_profile_list(profiles) + ) + )) if (nrow(oa) == nrow(profiles)) { - message("No orthogonal array found, using full factorial") + message("No orthogonal array found; using full factorial for design") } else { - message("Using orthogonal array with ", nrow(oa), " rows") + message( + "Orthogonal array found; using ", nrow(oa), " out of ", + nrow(profiles), " profiles for design" + ) } type_ids <- get_type_ids(profiles) oa <- join_profiles(oa, profiles, type_ids) From 4752d805f4ac44d7f3e079b594cf5dc4b24143eb Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 11:51:20 -0400 Subject: [PATCH 08/17] use 0 prior if no prior specified for CEA or Modfed method --- R/design.R | 59 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/R/design.R b/R/design.R index 66758b3..3a84d4f 100644 --- a/R/design.R +++ b/R/design.R @@ -11,9 +11,9 @@ #' @param n_resp Number of survey respondents. #' @param n_alts Number of alternatives per choice question. #' @param n_q Number of questions per respondent. -#' @param n_blocks Number of blocks used in Bayesian D-efficient design. -#' Max allowable is one block per respondent, defaults to `1`, meaning every -#' respondent sees the same set of choice questions. +#' @param n_blocks Number of blocks used in non-random designs (Orthogonal or +#' Bayesian D-efficient designs). Max allowable is one block per respondent. +#' Defaults to `1`, meaning every respondent sees the same choice set. #' @param n_draws Number of draws used in simulating the prior distribution #' used in Bayesian D-efficient designs. Defaults to `50`. #' @param n_start A numeric value indicating the number of random start designs @@ -26,12 +26,11 @@ #' @param label The name of the variable to use in a "labeled" design #' (also called an "alternative-specific design") such that each set of #' alternatives contains one of each of the levels in the `label` attribute. -#' Currently only compatible with randomized designs. If used, the `n_alts` -#' argument will be ignored as its value is defined by the unique number of -#' levels in the `label` variable. Defaults to `NULL`. +#' Currently not compatible with Bayesian D-efficient designs. If used, +#' the `n_alts` argument will be ignored as its value is defined by the unique +#' number of levels in the `label` variable. Defaults to `NULL`. #' @param priors A list of one or more assumed prior parameters used to -#' generate a Bayesian D-efficient design. If `NULL` (the default), a -#' randomized design will be generated. +#' generate a Bayesian D-efficient design. Defaults to `NULL` #' @param prior_no_choice Prior utility value for the "no choice" alternative. #' Only required if `no_choice = TRUE`. Defaults to `NULL`. #' @param probs If `TRUE`, for Bayesian D-efficient designs the resulting @@ -42,11 +41,10 @@ #' sets are created by randomly selecting from the full set of `profiles`. The #' `"orthogonal"` method first finds an orthogonal array from `profiles` and #' then randomly selects from it. For Bayesian D-efficient designs, use `"CEA"` -#' or `"Modfed"` along with specified `priors`. If priors are specified with no -#' specified `method`, `"CEA"` will be used. If `method` is set to `"CEA"` or -#' but without `priors` specified, a prior of all `0`s is used. If using a -#' restricted set of `profiles`, only the `"Modfed"` method can be used as -#' `"CEA"` requires unrestricted `profiles`. See `?idefix::CEA` and +#' or `"Modfed"` along with specified `priors`. If `method` is set to `"CEA"` +#' or `"Modfed"` but without `priors` specified, a prior of all `0`s is used. +#' If using a restricted set of `profiles`, only the `"Modfed"` method can be +#' used as `"CEA"` requires unrestricted `profiles`. See `?idefix::CEA` and #' `?idefix::Modfed` for more details. #' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned #' object will be a list containing the design and the DB-error score. @@ -390,25 +388,13 @@ make_design_bayesian <- function( label, priors, prior_no_choice, probs, method, keep_db_error, max_iter, parallel ) { - # Set up initial parameters for creating design - - # Make sure order of priors matches order of attributes in profiles - profile_lvls <- profiles[, 2:ncol(profiles)] - varnames <- names(profile_lvls) - priors <- priors[varnames] - - # Set up priors - mu <- unlist(priors) - if (no_choice) { - mu <- c(prior_no_choice, mu) - } - # Set up levels and coding - lvl.names <- unname(get_profile_list(profiles)) + profile_list <- get_profile_list(profiles) + type_ids <- get_type_ids(profiles) + lvl.names <- unname(profile_list) lvls <- unname(unlist(lapply(lvl.names, function(x) length(x)))) coding <- rep("C", length(lvls)) c.lvls <- NULL - type_ids <- get_type_ids(profiles) if (any(type_ids$continuous)) { c.lvls <- lvl.names[type_ids$continuous] } @@ -424,6 +410,23 @@ make_design_bayesian <- function( alt_cte <- c(alt_cte, 1) no_choice_alt <- n_alts } + + # Make sure order of priors matches order of attributes in profiles + profile_lvls <- profiles[, 2:ncol(profiles)] + varnames <- names(profile_lvls) + if (is.null(priors)) { + # No priors specified, so use all 0s + warning( + 'Since the ', method, ' method is used but no priors were ', + 'specified, a zero prior will be used (all coefficients set to 0)' + ) + priors <- lapply(profile_list, function(x) rep(0, length(x) - 1)) + priors[type_ids$continuous] <- 0 + } + mu <- unlist(priors[varnames]) + if (no_choice) { + mu <- c(prior_no_choice, mu) + } sigma <- diag(length(mu)) par_draws <- MASS::mvrnorm(n = n_draws, mu = mu, Sigma = sigma) n_alt_cte <- sum(alt_cte) From c04b4efc23ecc2649502333110c2386e29b6a3d2 Mon Sep 17 00:00:00 2001 From: John Helveston Date: Thu, 29 Jun 2023 17:14:14 -0400 Subject: [PATCH 09/17] Update design.R --- R/design.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/design.R b/R/design.R index 3a84d4f..dce6184 100644 --- a/R/design.R +++ b/R/design.R @@ -11,9 +11,9 @@ #' @param n_resp Number of survey respondents. #' @param n_alts Number of alternatives per choice question. #' @param n_q Number of questions per respondent. -#' @param n_blocks Number of blocks used in non-random designs (Orthogonal or -#' Bayesian D-efficient designs). Max allowable is one block per respondent. -#' Defaults to `1`, meaning every respondent sees the same choice set. +#' @param n_blocks Number of blocks used in Orthogonal or Bayesian D-efficient +#' designs. Max allowable is one block per respondent. Defaults to `1`, meaning +#' every respondent sees the same choice set. #' @param n_draws Number of draws used in simulating the prior distribution #' used in Bayesian D-efficient designs. Defaults to `50`. #' @param n_start A numeric value indicating the number of random start designs From ea94aed16f5c9a9106f5677168a4371a2b4bcb7e Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 07:58:38 -0400 Subject: [PATCH 10/17] update examples and documentation for new design methods --- DESCRIPTION | 2 +- R/choices.R | 7 +-- R/design.R | 125 +++++++++++++++++++++++++++++++---------------- R/input_checks.R | 4 +- R/inspect.R | 7 +-- R/power.R | 7 +-- 6 files changed, 97 insertions(+), 55 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 080df85..e4e7b21 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,7 @@ Authors@R: c( role = c("cre", "aut", "cph"), email = "john.helveston@gmail.com", comment = c(ORCID = "0000-0002-2657-9191"))) -Description: Design and evaluate choice-based conjoint survey experiments in R. Generate survey designs, including randomized designs and Bayesian D-efficient designs as well as designs with "no choice" options and labeled designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct power analyses on a survey design by estimating the same model multiple times using different subsets of the data to simulate different sample sizes. Choice simulation and model estimation are handled using the 'logitr' package, and Bayesian D-efficient designs are obtained using the 'idefix' package. For more details see Helveston (2023) and Traets et al (2020) . +Description: Design and evaluate choice-based conjoint survey experiments in R. Generate a variety of survey designs, including full factorial designs, orthogonal designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct power analyses on a survey design by estimating the same model multiple times using different subsets of the data to simulate different sample sizes. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Choice simulation and model estimation are handled using the 'logitr' package (Helveston, 2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true diff --git a/R/choices.R b/R/choices.R index afcb9b0..96ef5d0 100644 --- a/R/choices.R +++ b/R/choices.R @@ -27,12 +27,13 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design +#' # Make a survey design from all possible profiles +#' # (This is the default setting where method = 'full' for "full factorial") #' design <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6 # Number of questions per respondent +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6 # Number of questions per respondent #' ) #' #' # Simulate random choices diff --git a/R/design.R b/R/design.R index dce6184..a554752 100644 --- a/R/design.R +++ b/R/design.R @@ -1,11 +1,12 @@ -#' Make a random or Bayesian D-efficient choice-based conjoint survey design +#' Make a choice-based conjoint survey design #' #' This function creates a data frame containing a choice-based conjoint survey -#' design where each row is an alternative. Designs can be either a -#' randomized or Bayesian D-efficient, in which case an implementation of the -#' CEA or Modfed Federov algorithm is used via the {idefix} package +#' design where each row is an alternative. Generate a variety of survey +#' designs, including full factorial designs, orthogonal designs, and +#' Bayesian D-efficient designs as well as designs with "no choice" options +#' and "labeled" (also known as "alternative specific") designs. #' -#' @keywords logitr mnl mxl mixed logit design +#' @keywords experiment design mnl mxl mixed logit logitr idefix DoE.base #' @param profiles A data frame in which each row is a possible profile. #' This can be generated using the `cbc_profiles()` function. #' @param n_resp Number of survey respondents. @@ -29,6 +30,9 @@ #' Currently not compatible with Bayesian D-efficient designs. If used, #' the `n_alts` argument will be ignored as its value is defined by the unique #' number of levels in the `label` variable. Defaults to `NULL`. +#' @param method Choose the design method to use: `"full"`, `"orthogonal"`, +#' `"CEA"` or `"Modfed"`. Defaults to `"full"`. See details below for complete +#' description of each method. #' @param priors A list of one or more assumed prior parameters used to #' generate a Bayesian D-efficient design. Defaults to `NULL` #' @param prior_no_choice Prior utility value for the "no choice" alternative. @@ -37,15 +41,6 @@ #' design includes average predicted probabilities for each alternative in each #' choice set given the sample from the prior preference distribution. #' Defaults to `FALSE`. -#' @param method Which design method to use? Defaults to `"random"` where choice -#' sets are created by randomly selecting from the full set of `profiles`. The -#' `"orthogonal"` method first finds an orthogonal array from `profiles` and -#' then randomly selects from it. For Bayesian D-efficient designs, use `"CEA"` -#' or `"Modfed"` along with specified `priors`. If `method` is set to `"CEA"` -#' or `"Modfed"` but without `priors` specified, a prior of all `0`s is used. -#' If using a restricted set of `profiles`, only the `"Modfed"` method can be -#' used as `"CEA"` requires unrestricted `profiles`. See `?idefix::CEA` and -#' `?idefix::Modfed` for more details. #' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned #' object will be a list containing the design and the DB-error score. #' Defaults to `FALSE`. @@ -53,7 +48,40 @@ #' iterations when searching for a Bayesian D-efficient design. The default is #' 50. #' @param parallel Logical value indicating whether computations should be done -#' over multiple cores. The default is `TRUE`. +#' over multiple cores. The default is `FALSE`. +#' @details +#' The `method` argument determines the design method used. Options are: +#' +#' - `"full"` +#' - `"orthogonal"` +#' - `"CEA"` +#' - `"Modfed"` +#' +#' The `"full"` method uses a "full factorial" design where choice sets are +#' created by randomly selecting from the full set of `profiles`. Blocking can +#' used with these designs where blocks are created from subsets of the full +#' factorial design. For more information about blocking with full factorial +#' designs, see `?DoE.base::fac.design` as well as the JSS article on the +#' {DoE.base} package (Grömping, 2018) . +#' +#' The `"orthogonal"` method first finds an orthogonal array from the full +#' set of `profiles` (if possible), then randomly selects from it to create +#' choice sets. For some designs an orthogonal array can't be found, in which +#' case a full factorial design is used. This approach is also sometimes called +#' a "main effects" design since orthogonal arrays focus the information on the +#' main effects at the expense of information about interaction effects. For +#' more information about orthogonal designs, see `?DoE.base::oa.design` as +#' well as the JSS article on the {DoE.base} package +#' (Grömping, 2018) . +#' +#' For Bayesian D-efficient designs, use `"CEA"` or `"Modfed"` along with +#' specified `priors`. If `method` is set to `"CEA"` or `"Modfed"` but without +#' `priors` specified, a prior of all `0`s will be used and a warning message +#' stating this will be shown. If you are using a restricted set of `profiles`, +#' only the `"Modfed"` method can be used as `"CEA"` requires unrestricted +#' `profiles`. For more details on Bayesian D-efficient designs, see +#' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the +#' {idefix} package (Traets et al, 2020) . #' @return A data frame containing a choice-based conjoint survey design where #' each row is an alternative. #' @export @@ -69,9 +97,9 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design from all possible profiles -#' # (This is the default setting where method = 'random') -#' design_rand <- cbc_design( +#' # Make a survey design from all possible profiles +#' # (This is the default setting where method = 'full' for "full factorial") +#' design_full <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -87,9 +115,9 @@ #' method = 'orthogonal' #' ) #' -#' # Make a randomized survey design from all possible profiles +#' # Make a survey design from all possible profiles #' # with a "no choice" option -#' design_rand_nochoice <- cbc_design( +#' design_full_nochoice <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -97,9 +125,9 @@ #' no_choice = TRUE #' ) #' -#' # Make a randomized survey design from all possible profiles +#' # Make a survey design from all possible profiles #' # with each level of the "type" attribute appearing as an alternative -#' design_rand_labeled <- cbc_design( +#' design_full_labeled <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -108,13 +136,13 @@ #' ) #' #' # Make a Bayesian D-efficient design with a prior model specified -#' # Note that by default parallel = TRUE. +#' # Note that by speed can be improved by setting parallel = TRUE #' design_deff <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6, # Number of questions per respondent -#' n_start = 1, +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6, # Number of questions per respondent +#' n_start = 1, # Defauls to 5, set to 1 here for a quick example #' priors = list( #' price = -0.1, #' type = c(0.1, 0.2), @@ -130,16 +158,16 @@ cbc_design <- function( n_q, n_blocks = 1, n_draws = 50, - no_choice = FALSE, n_start = 5, + no_choice = FALSE, label = NULL, + method = "full", priors = NULL, prior_no_choice = NULL, probs = FALSE, - method = "random", keep_db_error = FALSE, max_iter = 50, - parallel = TRUE + parallel = FALSE ) { method <- check_design_method(method, priors) check_inputs_design( @@ -149,20 +177,20 @@ cbc_design <- function( n_q, n_blocks, n_draws, - no_choice, n_start, + no_choice, label, + method, priors, prior_no_choice, probs, - method, keep_db_error, max_iter, parallel ) profiles <- as.data.frame(profiles) # tibbles break things - if (method == 'random') { - design <- get_randomized_design( + if (method == 'full') { + design <- make_design_full( profiles, n_resp, n_alts, n_q, no_choice, label ) } else if (method == 'orthogonal') { @@ -171,8 +199,8 @@ cbc_design <- function( ) } else { design <- make_design_bayesian( - profiles, n_resp, n_alts, n_q, n_blocks, n_draws, no_choice, n_start, - label, priors, prior_no_choice, probs, method, keep_db_error, max_iter, + profiles, n_resp, n_alts, n_q, n_blocks, n_draws, n_start, no_choice, + label, method, priors, prior_no_choice, probs, keep_db_error, max_iter, parallel ) } @@ -181,7 +209,9 @@ cbc_design <- function( return(design) } -# Random Design ---- +# Randomize the design ---- + +# Sample from profiles to create randomized choice sets get_randomized_design <- function( profiles, n_resp, n_alts, n_q, no_choice, label @@ -357,6 +387,17 @@ reorder_cols <- function(design) { return(design) } +# Full Factorial Design ---- + +make_design_full <- function( + profiles, n_resp, n_alts, n_q, no_choice, label +) { + design <- get_randomized_design( + profiles, n_resp, n_alts, n_q, no_choice, label + ) + return(design) +} + # Orthogonal Design ---- make_design_orthogonal <- function( @@ -384,8 +425,8 @@ make_design_orthogonal <- function( # Bayesian D-efficient Design ---- make_design_bayesian <- function( - profiles, n_resp, n_alts, n_q, n_blocks, n_draws, no_choice, n_start, - label, priors, prior_no_choice, probs, method, keep_db_error, max_iter, + profiles, n_resp, n_alts, n_q, n_blocks, n_draws, n_start, no_choice, + label, method, priors, prior_no_choice, probs, keep_db_error, max_iter, parallel ) { # Set up levels and coding @@ -411,7 +452,7 @@ make_design_bayesian <- function( no_choice_alt <- n_alts } - # Make sure order of priors matches order of attributes in profiles + # Setup priors profile_lvls <- profiles[, 2:ncol(profiles)] varnames <- names(profile_lvls) if (is.null(priors)) { @@ -423,6 +464,7 @@ make_design_bayesian <- function( priors <- lapply(profile_list, function(x) rep(0, length(x) - 1)) priors[type_ids$continuous] <- 0 } + # Make sure order of priors matches order of attributes in profiles mu <- unlist(priors[varnames]) if (no_choice) { mu <- c(prior_no_choice, mu) @@ -437,9 +479,7 @@ make_design_bayesian <- function( } # Make the design - profiles_restricted <- nrow(expand.grid(lvl.names)) > nrow(profiles) - if (profiles_restricted & (method == "CEA")) { # "CEA" method only works with unrestricted profile set method <- "Modfed" @@ -448,7 +488,6 @@ make_design_bayesian <- function( 'profiles, so "Modfed" is being used instead.\n' ) } - if (method == "CEA") { D <- idefix::CEA( lvls = lvls, @@ -514,7 +553,7 @@ make_design_bayesian <- function( design <- add_metadata(design, n_resp, n_alts, n_q) design <- reorder_cols(design) - # Print error + # Print DB error message( "Bayesian D-efficient design found with DB-error of ", round(D$error, 5) diff --git a/R/input_checks.R b/R/input_checks.R index cc185fb..2605a55 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -49,13 +49,13 @@ check_inputs_design <- function( n_q, n_blocks, n_draws, - no_choice, n_start, + no_choice, label, + method, priors, prior_no_choice, probs, - method, keep_db_error, max_iter, parallel diff --git a/R/inspect.R b/R/inspect.R index 4982187..7cc4af5 100644 --- a/R/inspect.R +++ b/R/inspect.R @@ -20,12 +20,13 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design +#' # Make a survey design from all possible profiles +#' # (This is the default setting where method = 'full' for "full factorial") #' design <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6 # Number of questions per respondent +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6 # Number of questions per respondent #' ) #' #' # Inspect the design balance diff --git a/R/power.R b/R/power.R index 79fbc5b..ed3d212 100644 --- a/R/power.R +++ b/R/power.R @@ -60,12 +60,13 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design +#' # Make a survey design from all possible profiles +#' # (This is the default setting where method = 'full' for "full factorial") #' design <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6 # Number of questions per respondent +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6 # Number of questions per respondent #' ) #' #' # Simulate random choices From aa96c203029db8a104da8e8af5138ef43954c3c3 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 07:59:20 -0400 Subject: [PATCH 11/17] build man --- man/cbc_balance.Rd | 7 +-- man/cbc_choices.Rd | 7 +-- man/cbc_design.Rd | 117 ++++++++++++++++++++++++++++----------------- man/cbc_power.Rd | 7 +-- 4 files changed, 86 insertions(+), 52 deletions(-) diff --git a/man/cbc_balance.Rd b/man/cbc_balance.Rd index 898c5da..3c05504 100644 --- a/man/cbc_balance.Rd +++ b/man/cbc_balance.Rd @@ -29,12 +29,13 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design +# Make a survey design from all possible profiles +# (This is the default setting where method = 'full' for "full factorial") design <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_alts = 3, # Number of alternatives per question + n_q = 6 # Number of questions per respondent ) # Inspect the design balance diff --git a/man/cbc_choices.Rd b/man/cbc_choices.Rd index b0d3746..676b18a 100644 --- a/man/cbc_choices.Rd +++ b/man/cbc_choices.Rd @@ -39,12 +39,13 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design +# Make a survey design from all possible profiles +# (This is the default setting where method = 'full' for "full factorial") design <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_alts = 3, # Number of alternatives per question + n_q = 6 # Number of questions per respondent ) # Simulate random choices diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index 03b0ada..f3bbd08 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/design.R \name{cbc_design} \alias{cbc_design} -\title{Make a random or Bayesian D-efficient choice-based conjoint survey design} +\title{Make a choice-based conjoint survey design} \usage{ cbc_design( profiles, @@ -11,16 +11,16 @@ cbc_design( n_q, n_blocks = 1, n_draws = 50, - no_choice = FALSE, n_start = 5, + no_choice = FALSE, label = NULL, + method = "full", priors = NULL, prior_no_choice = NULL, probs = FALSE, - method = "random", keep_db_error = FALSE, max_iter = 50, - parallel = TRUE + parallel = FALSE ) } \arguments{ @@ -33,32 +33,35 @@ This can be generated using the \code{cbc_profiles()} function.} \item{n_q}{Number of questions per respondent.} -\item{n_blocks}{Number of blocks used in Bayesian D-efficient design. -Max allowable is one block per respondent, defaults to \code{1}, meaning every -respondent sees the same set of choice questions.} +\item{n_blocks}{Number of blocks used in Orthogonal or Bayesian D-efficient +designs. Max allowable is one block per respondent. Defaults to \code{1}, meaning +every respondent sees the same choice set.} \item{n_draws}{Number of draws used in simulating the prior distribution used in Bayesian D-efficient designs. Defaults to \code{50}.} -\item{no_choice}{Include a "no choice" option in the choice sets? Defaults -to \code{FALSE}. If \code{TRUE}, the total number of alternatives per question will be -one more than the provided \code{n_alts} argument.} - \item{n_start}{A numeric value indicating the number of random start designs to use in obtaining a Bayesian D-efficient design. The default is \code{5}. Increasing \code{n_start} can result in a more efficient design at the expense of increased computational time.} +\item{no_choice}{Include a "no choice" option in the choice sets? Defaults +to \code{FALSE}. If \code{TRUE}, the total number of alternatives per question will be +one more than the provided \code{n_alts} argument.} + \item{label}{The name of the variable to use in a "labeled" design (also called an "alternative-specific design") such that each set of alternatives contains one of each of the levels in the \code{label} attribute. -Currently only compatible with randomized designs. If used, the \code{n_alts} -argument will be ignored as its value is defined by the unique number of -levels in the \code{label} variable. Defaults to \code{NULL}.} +Currently not compatible with Bayesian D-efficient designs. If used, +the \code{n_alts} argument will be ignored as its value is defined by the unique +number of levels in the \code{label} variable. Defaults to \code{NULL}.} + +\item{method}{Choose the design method to use: \code{"full"}, \code{"orthogonal"}, +\code{"CEA"} or \code{"Modfed"}. Defaults to \code{"full"}. See details below for complete +description of each method.} \item{priors}{A list of one or more assumed prior parameters used to -generate a Bayesian D-efficient design. If \code{NULL} (the default), a -randomized design will be generated.} +generate a Bayesian D-efficient design. Defaults to \code{NULL}} \item{prior_no_choice}{Prior utility value for the "no choice" alternative. Only required if \code{no_choice = TRUE}. Defaults to \code{NULL}.} @@ -68,17 +71,6 @@ design includes average predicted probabilities for each alternative in each choice set given the sample from the prior preference distribution. Defaults to \code{FALSE}.} -\item{method}{Which design method to use? Defaults to \code{"random"} where choice -sets are created by randomly selecting from the full set of \code{profiles}. The -\code{"orthogonal"} method first finds an orthogonal array from \code{profiles} and -then randomly selects from it. For Bayesian D-efficient designs, use \code{"CEA"} -or \code{"Modfed"} along with specified \code{priors}. If priors are specified with no -specified \code{method}, \code{"CEA"} will be used. If \code{method} is set to \code{"CEA"} or -but without \code{priors} specified, a prior of all \code{0}s is used. If using a -restricted set of \code{profiles}, only the \code{"Modfed"} method can be used as -\code{"CEA"} requires unrestricted \code{profiles}. See \code{?idefix::CEA} and -\code{?idefix::Modfed} for more details.} - \item{keep_db_error}{If \code{TRUE}, for Bayesian D-efficient designs the returned object will be a list containing the design and the DB-error score. Defaults to \code{FALSE}.} @@ -88,7 +80,7 @@ iterations when searching for a Bayesian D-efficient design. The default is 50.} \item{parallel}{Logical value indicating whether computations should be done -over multiple cores. The default is \code{TRUE}.} +over multiple cores. The default is \code{FALSE}.} } \value{ A data frame containing a choice-based conjoint survey design where @@ -96,9 +88,45 @@ each row is an alternative. } \description{ This function creates a data frame containing a choice-based conjoint survey -design where each row is an alternative. Designs can be either a -randomized or Bayesian D-efficient, in which case an implementation of the -CEA or Modfed Federov algorithm is used via the {idefix} package +design where each row is an alternative. Generate a variety of survey +designs, including full factorial designs, orthogonal designs, and +Bayesian D-efficient designs as well as designs with "no choice" options +and "labeled" (also known as "alternative specific") designs. +} +\details{ +The \code{method} argument determines the design method used. Options are: +\itemize{ +\item \code{"full"} +\item \code{"orthogonal"} +\item \code{"CEA"} +\item \code{"Modfed"} +} + +The \code{"full"} method uses a "full factorial" design where choice sets are +created by randomly selecting from the full set of \code{profiles}. Blocking can +used with these designs where blocks are created from subsets of the full +factorial design. For more information about blocking with full factorial +designs, see \code{?DoE.base::fac.design} as well as the JSS article on the +{DoE.base} package (Grömping, 2018) \url{doi:10.18637/jss.v085.i05}. + +The \code{"orthogonal"} method first finds an orthogonal array from the full +set of \code{profiles} (if possible), then randomly selects from it to create +choice sets. For some designs an orthogonal array can't be found, in which +case a full factorial design is used. This approach is also sometimes called +a "main effects" design since orthogonal arrays focus the information on the +main effects at the expense of information about interaction effects. For +more information about orthogonal designs, see \code{?DoE.base::oa.design} as +well as the JSS article on the {DoE.base} package +(Grömping, 2018) \url{doi:10.18637/jss.v085.i05}. + +For Bayesian D-efficient designs, use \code{"CEA"} or \code{"Modfed"} along with +specified \code{priors}. If \code{method} is set to \code{"CEA"} or \code{"Modfed"} but without +\code{priors} specified, a prior of all \code{0}s will be used and a warning message +stating this will be shown. If you are using a restricted set of \code{profiles}, +only the \code{"Modfed"} method can be used as \code{"CEA"} requires unrestricted +\code{profiles}. For more details on Bayesian D-efficient designs, see +\code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the +{idefix} package (Traets et al, 2020) \url{doi:10.18637/jss.v096.i03}. } \examples{ library(cbcTools) @@ -112,9 +140,9 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design from all possible profiles -# (This is the default setting where method = 'random') -design_rand <- cbc_design( +# Make a survey design from all possible profiles +# (This is the default setting where method = 'full' for "full factorial") +design_full <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -122,7 +150,7 @@ design_rand <- cbc_design( ) # Make a survey design from an orthogonal array of profiles -design_rand <- cbc_design( +design_ortho <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -130,9 +158,9 @@ design_rand <- cbc_design( method = 'orthogonal' ) -# Make a randomized survey design from all possible profiles +# Make a survey design from all possible profiles # with a "no choice" option -design_rand_nochoice <- cbc_design( +design_full_nochoice <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -140,9 +168,9 @@ design_rand_nochoice <- cbc_design( no_choice = TRUE ) -# Make a randomized survey design from all possible profiles +# Make a survey design from all possible profiles # with each level of the "type" attribute appearing as an alternative -design_rand_labeled <- cbc_design( +design_full_labeled <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -151,13 +179,13 @@ design_rand_labeled <- cbc_design( ) # Make a Bayesian D-efficient design with a prior model specified -# Note that by default parallel = TRUE. +# Note that by speed can be improved by setting parallel = TRUE design_deff <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - n_start = 1, + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + n_start = 1, # Defauls to 5, set to 1 here for a quick example priors = list( price = -0.1, type = c(0.1, 0.2), @@ -167,7 +195,10 @@ design_deff <- cbc_design( parallel = FALSE ) } +\keyword{DoE.base} \keyword{design} +\keyword{experiment} +\keyword{idefix} \keyword{logit} \keyword{logitr} \keyword{mixed} diff --git a/man/cbc_power.Rd b/man/cbc_power.Rd index d44a3d8..c867983 100644 --- a/man/cbc_power.Rd +++ b/man/cbc_power.Rd @@ -97,12 +97,13 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design +# Make a survey design from all possible profiles +# (This is the default setting where method = 'full' for "full factorial") design <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_alts = 3, # Number of alternatives per question + n_q = 6 # Number of questions per respondent ) # Simulate random choices From 42f2ec2a73518a38f5efe46164e90953e46d206a Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 08:49:47 -0400 Subject: [PATCH 12/17] updated vignette with new methods and better citations --- R/design.R | 14 ++++-- R/input_checks.R | 4 +- man/cbc_design.Rd | 15 +++++-- vignettes/library.bib | 35 +++++++++++++++ vignettes/usage.Rmd | 102 ++++++++++++++++++++++++++++-------------- 5 files changed, 127 insertions(+), 43 deletions(-) create mode 100644 vignettes/library.bib diff --git a/R/design.R b/R/design.R index a554752..57ad7df 100644 --- a/R/design.R +++ b/R/design.R @@ -62,7 +62,7 @@ #' used with these designs where blocks are created from subsets of the full #' factorial design. For more information about blocking with full factorial #' designs, see `?DoE.base::fac.design` as well as the JSS article on the -#' {DoE.base} package (Grömping, 2018) . +#' {DoE.base} package (Grömping, 2018) \doi{10.18637/jss.v085.i05}. #' #' The `"orthogonal"` method first finds an orthogonal array from the full #' set of `profiles` (if possible), then randomly selects from it to create @@ -72,7 +72,7 @@ #' main effects at the expense of information about interaction effects. For #' more information about orthogonal designs, see `?DoE.base::oa.design` as #' well as the JSS article on the {DoE.base} package -#' (Grömping, 2018) . +#' (Grömping, 2018) \doi{10.18637/jss.v085.i05}. #' #' For Bayesian D-efficient designs, use `"CEA"` or `"Modfed"` along with #' specified `priors`. If `method` is set to `"CEA"` or `"Modfed"` but without @@ -81,7 +81,13 @@ #' only the `"Modfed"` method can be used as `"CEA"` requires unrestricted #' `profiles`. For more details on Bayesian D-efficient designs, see #' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the -#' {idefix} package (Traets et al, 2020) . +#' {idefix} package (Traets et al, 2020) \doi{10.18637/jss.v096.i03}. +#' @references +#' Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal +#' of Statistical Software, 85(5), 1–41. \doi{10.18637/jss.v085.i05}. +#' Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal +#' Designs for Discrete Choice Experiments in R: The idefix Package. Journal +#' of Statistical Software, 96(3), 1–41. \doi{10.18637/jss.v096.i03}. #' @return A data frame containing a choice-based conjoint survey design where #' each row is an alternative. #' @export @@ -137,7 +143,7 @@ #' #' # Make a Bayesian D-efficient design with a prior model specified #' # Note that by speed can be improved by setting parallel = TRUE -#' design_deff <- cbc_design( +#' design_bayesian <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents #' n_alts = 3, # Number of alternatives per question diff --git a/R/input_checks.R b/R/input_checks.R index 2605a55..ecdbeb8 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -86,9 +86,9 @@ check_inputs_design <- function( # Check that an appropriate method is used - if (! method %in% c('random', 'orthogonal', 'CEA', 'Modfed')) { + if (! method %in% c('full', 'orthogonal', 'CEA', 'Modfed')) { stop( - 'The "method" argument must be set to "random", "orthogonal", ', + 'The "method" argument must be set to "full", "orthogonal", ', '"Modfed", or "CEA"' ) } diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index f3bbd08..cdf00d5 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -107,7 +107,7 @@ created by randomly selecting from the full set of \code{profiles}. Blocking can used with these designs where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see \code{?DoE.base::fac.design} as well as the JSS article on the -{DoE.base} package (Grömping, 2018) \url{doi:10.18637/jss.v085.i05}. +{DoE.base} package (Grömping, 2018) \doi{10.18637/jss.v085.i05}. The \code{"orthogonal"} method first finds an orthogonal array from the full set of \code{profiles} (if possible), then randomly selects from it to create @@ -117,7 +117,7 @@ a "main effects" design since orthogonal arrays focus the information on the main effects at the expense of information about interaction effects. For more information about orthogonal designs, see \code{?DoE.base::oa.design} as well as the JSS article on the {DoE.base} package -(Grömping, 2018) \url{doi:10.18637/jss.v085.i05}. +(Grömping, 2018) \doi{10.18637/jss.v085.i05}. For Bayesian D-efficient designs, use \code{"CEA"} or \code{"Modfed"} along with specified \code{priors}. If \code{method} is set to \code{"CEA"} or \code{"Modfed"} but without @@ -126,7 +126,7 @@ stating this will be shown. If you are using a restricted set of \code{profiles} only the \code{"Modfed"} method can be used as \code{"CEA"} requires unrestricted \code{profiles}. For more details on Bayesian D-efficient designs, see \code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the -{idefix} package (Traets et al, 2020) \url{doi:10.18637/jss.v096.i03}. +{idefix} package (Traets et al, 2020) \doi{10.18637/jss.v096.i03}. } \examples{ library(cbcTools) @@ -180,7 +180,7 @@ design_full_labeled <- cbc_design( # Make a Bayesian D-efficient design with a prior model specified # Note that by speed can be improved by setting parallel = TRUE -design_deff <- cbc_design( +design_bayesian <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -195,6 +195,13 @@ design_deff <- cbc_design( parallel = FALSE ) } +\references{ +Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal +of Statistical Software, 85(5), 1–41. \doi{10.18637/jss.v085.i05}. +Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal +Designs for Discrete Choice Experiments in R: The idefix Package. Journal +of Statistical Software, 96(3), 1–41. \doi{10.18637/jss.v096.i03}. +} \keyword{DoE.base} \keyword{design} \keyword{experiment} diff --git a/vignettes/library.bib b/vignettes/library.bib new file mode 100644 index 0000000..c7564af --- /dev/null +++ b/vignettes/library.bib @@ -0,0 +1,35 @@ +@article{Grömping2018, + title={R Package DoE.base for Factorial Experiments}, + volume={85}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v085i05}, + doi={10.18637/jss.v085.i05}, + number={5}, + journal={Journal of Statistical Software}, + author={Grömping, Ulrike}, + year={2018}, + pages={1–41} +} + +@article{Traets2020, + title={Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package}, + volume={96}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v096i03}, + doi={10.18637/jss.v096.i03}, + number={3}, + journal={Journal of Statistical Software}, + author={Traets, Frits and Sanchez, Daniel Gil and Vandebroek, Martina}, + year={2020}, + pages={1–41} +} + +@article{Helveston2023, + title={logitr: Fast Estimation of Multinomial and Mixed Logit Models with Preference Space and Willingness-to-Pay Space Utility Parameterizations}, + volume={105}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v105i10}, + doi={10.18637/jss.v105.i10}, + number={10}, + journal={Journal of Statistical Software}, + author={Helveston, John Paul}, + year={2023}, + pages={1–37} +} \ No newline at end of file diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 9eaa394..e6ac399 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -5,6 +5,7 @@ vignette: > %\VignetteIndexEntry{Usage} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} +bibliography: "`r here::here('vignettes', 'library.bib')`" --- ```{r setup, include=FALSE, message=FALSE, warning=FALSE} @@ -35,7 +36,7 @@ The first step in designing an experiment is to define the attributes and levels library(cbcTools) profiles <- cbc_profiles( - price = seq(1, 4, 0.5), # $ per pound + price = seq(1, 5, 0.5), # $ per pound type = c('Fuji', 'Gala', 'Honeycrisp'), freshness = c('Poor', 'Average', 'Excellent') ) @@ -51,21 +52,21 @@ Depending on the context of your survey, you may wish to eliminate some profiles > **CAUTION: including restrictions in your designs can substantially reduce the statistical power of your design, so use them cautiously and avoid them if possible**. -If you do wish to restrict some attribute level combinations, you can do so using the `cbc_restrict()` function, which takes the full set of `profiles` along with any number of restricted pairs of attribute levels, defined as pairs of logical expressions separated by commas. In the example below, I include the following restrictions: +If you do wish to restrict some attribute level combinations, you can do so using the `cbc_restrict()` function, which takes the full set of `profiles` along with any number of restricted pairs of attribute levels, defined as pairs of logical expressions separated by commas. In the example below, I include the following restrictions (these are arbitrary and just for illustration purposes): - `"Gala"` apples will not be shown with the prices `1.5`, `2.5`, and `3.5`. - `"Honeycrisp"` apples will not be shown with prices less than `2`. -- `"Honeycrisp"` apples will not be shown with the `"Poor"` freshness. +- `"Honeycrisp"` apples will only be shown with `"Average"` freshness. - `"Fuji"` apples will not be shown with the `"Excellent"` freshness. -With these restrictions, there are now only 32 profiles compared to 63 without them: +With these restrictions, there are now only 39 profiles compared to 81 without them: ```{r} restricted_profiles <- cbc_restrict( profiles, type == "Gala" & price %in% c(1.5, 2.5, 3.5), type == "Honeycrisp" & price > 2, - type == "Honeycrisp" & freshness == "Poor", + type == "Honeycrisp" & freshness %in% c("Poor", "Excellent"), type == "Fuji" & freshness == "Excellent" ) @@ -76,44 +77,68 @@ restricted_profiles Once a set of profiles is obtained, a conjoint survey can then be generated using the `cbc_design()` function. A variety of survey designs can be generated, including: -- Random designs +- Full factorial designs +- Orthogonal (a.k.a. "main effects") designs - Labeled designs (a.k.a. "alternative-specific" designs) - Designs with a "no choice" option (a.k.a. "outside good") - Bayesian D-efficient designs -## Random designs +## Full factorial designs + +Full factorial designs are obtained by setting `method = 'full'` (the default setting for `method`). This method simply samples from the full set of `profiles`, ensuring that no two profiles are the same in any choice question. Blocking can used with these designs where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Grömping2018]. -The randomized design simply samples from the set of `profiles`, ensuring that no two profiles are the same in any choice question. The resulting `design` data frame includes the following columns: +The resulting `design` data frame includes the following columns: - `profileID`: Identifies the profile in `profiles`. - `respID`: Identifies each survey respondent. - `qID`: Identifies the choice question answered by the respondent. - `altID`:Identifies the alternative in any one choice observation. - `obsID`: Identifies each unique choice observation across all respondents. +- `blockID`: If blocking is used, identifies each unique block. ```{r} -design <- cbc_design( +design_full <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_q = 6, # Number of questions per respondent + method = 'full' # This is the default method +) + +dim(design_full) # View dimensions +head(design_full) # Preview first 6 rows +``` + +## Orthogonal designs + +Orthogonal designs are obtained by setting `method = 'orthogonal'`. This method first finds an orthogonal array from the full set of `profiles` (if possible), then randomly selects from it to create choice sets. For some designs an orthogonal array can't be found, in which case a full factorial design is used. This approach is also sometimes called a "main effects" design since orthogonal arrays focus the information on the main effects at the expense of information about interaction effects. For more information about orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article on the {DoE.base} package [@Grömping2018] + +```{r} +#| message: true + +design_ortho <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'orthogonal' ) -dim(design) # View dimensions -head(design) # Preview first 6 rows +dim(design_ortho) # View dimensions +head(design_ortho) # Preview first 6 rows ``` ## Labeled designs (a.k.a. "alternative-specific" designs) -You can also make a "labeled" design (also known as "alternative-specific" design) where the levels of one attribute is used as a label by setting the `label` argument to that attribute. This by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, so the `n_alts` argument is overridden. Here is an example labeled survey using the `type` attribute as the label: +You can also make a "labeled" design (also known as "alternative-specific" design) where the levels of one attribute are used as a label by setting the `label` argument to that attribute. This by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, so the `n_alts` argument is overridden. Here is an example of a labeled full factorial survey using the `type` attribute as the label: ```{r} design_labeled <- cbc_design( - profiles = profiles, - n_resp = 900, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - label = "type" # Set the "type" attribute as the label + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + label = "type" # Set the "type" attribute as the label ) dim(design_labeled) @@ -122,9 +147,9 @@ head(design_labeled) In the above example, you can see in the first six rows of the survey that the `type` attribute is always fixed to be the same order, ensuring that each level in the `type` attribute will always be shown in each choice question. -## Designs with a "no choice" option (a.k.a. "outside good") +## Designs with a "no choice" option (a.k.a. an "outside good") -You can include a "no choice" (also known as "outside good") option in your survey by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. +You can include a "no choice" (also known as an "outside good") option in your survey by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. ```{r} design_nochoice <- cbc_design( @@ -141,7 +166,7 @@ head(design_nochoice) ## Bayesian D-efficient designs -A Bayesian D-efficient design can be obtained by providing a list of prior parameters to define an expected prior utility model. These designs are optimized to minimize the D-error of the design given a prior model. The optimization is handled using the [{idefix} package](https://www.jstatsoft.org/article/view/v096i03). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. +Bayesian D-efficient designs are obtained by setting `method = 'CEA'` or `method = 'Modfed'` along with a specified list of `priors` (parameters that define a prior utility model). These designs are optimized to minimize the D-error of the design given a prior model. The optimization is handled using the [{idefix} package](https://cran.r-project.org/web/packages/idefix/index.html). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. If `method` is set to `"CEA"` or `"Modfed"` but without `priors` specified, a prior of all `0`s will be used and a warning message stating this will be shown. If you are using a restricted set of `profiles`, only the `"Modfed"` method can be used as `"CEA"` requires unrestricted `profiles`. For more details on Bayesian D-efficient designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the {idefix} package [@Traets2020]. In the example below, the prior model assumes the following parameters: @@ -150,7 +175,7 @@ In the example below, the prior model assumes the following parameters: - 2 categorical parameters for `freshness` (`"Average"` and `"Excellent"`) ```{r} -design_db_eff <- cbc_design( +design_bayesian <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -159,17 +184,18 @@ design_db_eff <- cbc_design( price = -0.1, type = c(0.1, 0.2), freshness = c(0.1, 0.2) - ) + ), + method = 'CEA' ) -dim(design_db_eff) -head(design_db_eff) +dim(design_bayesian) +head(design_bayesian) ``` Bayesian D-efficient designs that include a "no choice" option should set `no_choice = TRUE` and also define a prior for the "no choice" option using `prior_no_choice`, e.g.: ```{r} -design_db_eff_no_choice <- cbc_design( +design_bayesian_no_choice <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -180,11 +206,12 @@ design_db_eff_no_choice <- cbc_design( type = c(0.1, 0.2), freshness = c(0.1, 0.2) ), - prior_no_choice = -0.1 + prior_no_choice = -0.1, + method = 'CEA' ) -dim(design_db_eff_no_choice) -head(design_db_eff_no_choice) +dim(design_bayesian_no_choice) +head(design_bayesian_no_choice) ``` # Inspect survey designs @@ -194,6 +221,13 @@ The package includes some functions to quickly inspect some basic metrics of a d The `cbc_balance()` function prints out a summary of the individual and pairwise counts of each level of each attribute across all choice questions: ```{r} +design <- cbc_design( + profiles = profiles, + n_resp = 900, + n_alts = 3, + n_q = 6 +) + cbc_balance(design) ``` @@ -205,7 +239,7 @@ cbc_overlap(design) # Simulate choices -You can simulate choices for a given `design` using the `cbc_choices()` function. +You can simulate choices for a given `design` using the `cbc_choices()` function. Choices are simulated using the [{logitr} package](https://jhelvy.github.io/logitr/). For more information, see `?logitr::logitr` as well as the JSS article on the {logitr} package [@Helveston2023]. ## Random choices @@ -277,7 +311,7 @@ data <- cbc_choices( # Conduct a power analysis -The simulated choice data can be used to conduct a power analysis by estimating the same model multiple times with incrementally increasing sample sizes. As the sample size increases, the estimated coefficient standard errors will decrease (i.e. coefficient estimates become more precise). The `cbc_power()` function achieves this by partitioning the choice data into multiple sizes (defined by the `nbreaks` argument) and then estimating a user-defined choice model on each data subset. In the example below, 10 different sample sizes are used. All models are estimated using the [{logitr}](https://jhelvy.github.io/logitr/) package: +The simulated choice data can be used to conduct a power analysis by estimating the same model multiple times with incrementally increasing sample sizes. As the sample size increases, the estimated coefficient standard errors will decrease (i.e. coefficient estimates become more precise). The `cbc_power()` function achieves this by partitioning the choice data into multiple sizes (defined by the `nbreaks` argument) and then estimating a user-defined choice model on each data subset. In the example below, 10 different sample sizes are used. All models are estimated using the [{logitr} package](https://jhelvy.github.io/logitr/). For more information, see `?logitr::logitr` as well as the JSS article on the {logitr} package [@Helveston2023]. ```{r} power <- cbc_power( @@ -321,8 +355,10 @@ summary(models[[10]]) One of the convenient features of how the package is written is that the object generated in each step is used as the first argument to the function for the next step. Thus, just like in the overall program diagram, the functions can be piped together. For example, the "pipeline" below uses the Base R pipe operator (`|>`) to generate profiles, generate a design, simulate choices according to a prior utility model, conduct a power analysis, and then finally plot the results: -```{r, eval=FALSE} -design <- cbc_profiles( +```{r} +#| eval: false + +power_plot <- cbc_profiles( price = seq(1, 4, 0.5), # $ per pound type = c('Fuji', 'Gala', 'Honeycrisp'), freshness = c('Poor', 'Average', 'Excellent') From 1c7db376d421e5fb066da5f7576efe13f5d71023 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 11:21:45 -0400 Subject: [PATCH 13/17] support for blocked full factorial designs --- R/design.R | 61 +++++++++++++++++++++++++++++++++++++++++------- R/input_checks.R | 5 ++++ 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/R/design.R b/R/design.R index 57ad7df..9a7f90c 100644 --- a/R/design.R +++ b/R/design.R @@ -197,7 +197,7 @@ cbc_design <- function( profiles <- as.data.frame(profiles) # tibbles break things if (method == 'full') { design <- make_design_full( - profiles, n_resp, n_alts, n_q, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) } else if (method == 'orthogonal') { design <- make_design_orthogonal( @@ -396,11 +396,52 @@ reorder_cols <- function(design) { # Full Factorial Design ---- make_design_full <- function( - profiles, n_resp, n_alts, n_q, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) { - design <- get_randomized_design( - profiles, n_resp, n_alts, n_q, no_choice, label - ) + if (n_blocks > 1) { + design <- make_design_full_blocked( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label + ) + } else { + design <- get_randomized_design( + profiles, n_resp, n_alts, n_q, no_choice, label + ) + } + return(design) +} + +make_design_full_blocked <- function( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label +) { + # Make blocks + design <- suppressMessages(as.data.frame( + DoE.base::fac.design( + factor.names = get_profile_list(profiles), + blocks = n_blocks, + block.name = "blockID" + ) + )) + design$blockID <- as.numeric(as.character(design$blockID)) + design <- design[,c(names(profiles)[2:ncol(profiles)], "blockID")] + type_ids <- get_type_ids(profiles) + profiles <- join_profiles(design, profiles, type_ids) + + # Randomize design within each block + profiles <- split(profiles, profiles$blockID) + # Make sure number of respondents divides well into blocks + n_resp_list <- rep(n_resp / n_blocks, n_blocks) + if (! all(n_resp_list %% 1 == 0)) { + n_resp_list <- floor(n_resp_list) + n_resp_list[n_blocks] <- n_resp_list[n_blocks] + 1 + } + design <- list() + for (i in 1:n_blocks) { + design[[i]] <- get_randomized_design( + profiles[[i]], n_resp_list[i], n_alts, n_q, no_choice, label + ) + } + design <- do.call(rbind, design) + design <- add_metadata(design, n_resp, n_alts, n_q) return(design) } @@ -410,9 +451,9 @@ make_design_orthogonal <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { oa <- suppressMessages(as.data.frame( - DoE.base::oa.design( - factor.names = get_profile_list(profiles) - ) + DoE.base::oa.design( + factor.names = get_profile_list(profiles) + ) )) if (nrow(oa) == nrow(profiles)) { message("No orthogonal array found; using full factorial for design") @@ -641,6 +682,9 @@ join_profiles <- function(design, profiles, type_ids) { # Keep track of row order in design design$row_id <- seq(nrow(design)) + # Before joining profiles, ensure that all the data types are the same + # as in profiles, otherwise join won't work properly + # Convert numeric columns to actual numbers for (id in which(type_ids$continuous)) { design[,id] <- as.numeric(as.character(design[,id])) @@ -655,6 +699,7 @@ join_profiles <- function(design, profiles, type_ids) { varnames <- names(profiles[, 2:ncol(profiles)]) design <- merge(design, profiles, by = varnames, all.x = TRUE) design <- design[order(design$row_id),] + if ('blockID' %in% names(design)) { varnames <- c(varnames, 'blockID') } design <- design[c('profileID', varnames)] return(design) } diff --git a/R/input_checks.R b/R/input_checks.R index ecdbeb8..b9308db 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -60,6 +60,11 @@ check_inputs_design <- function( max_iter, parallel ) { + + if (n_blocks < 1) { + stop('n_blocks must be greater than or equal to 1') + } + if (n_blocks > n_resp) { stop("Maximum allowable number of blocks is one block per respondent") } From 4fdcb7944151cb87ff5565aafc3d446437830c66 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 11:44:17 -0400 Subject: [PATCH 14/17] updated paper references throughout --- R/choices.R | 7 ++++++- R/design.R | 15 +++++++-------- R/power.R | 6 +++++- man/cbc_choices.Rd | 8 +++++++- man/cbc_design.Rd | 15 +++++++-------- man/cbc_power.Rd | 7 ++++++- vignettes/usage.Rmd | 4 +++- 7 files changed, 41 insertions(+), 21 deletions(-) diff --git a/R/choices.R b/R/choices.R index 96ef5d0..77e8065 100644 --- a/R/choices.R +++ b/R/choices.R @@ -1,7 +1,9 @@ #' Simulate choices for a survey design #' #' Simulate choices for a survey design, either randomly or according to a -#' utility model defined by user-provided prior parameters. +#' utility model defined by user-provided prior parameters. All choices are +#' simulated using the 'logitr' package. For more details see the JSS article +#' on the 'logitr' package (Helveston, 2023). #' @keywords logitr mnl mxl mixed logit simulation #' #' @param design A data frame of a survey design. @@ -12,6 +14,9 @@ #' If `NULL` (the default), choices will be randomly assigned. #' @param n_draws The number of Halton draws to use for simulated choices #' for mixed logit models. Defaults to `100`. +#' @references +#' Helveston, J. P. (2023). logitr: Fast Estimation of Multinomial and Mixed Logit Models with Preference Space and Willingness-to-Pay Space Utility Parameterizations. Journal of Statistical Software, 105(10), 1–37, +#' \doi{10.18637/jss.v105.i10} #' @return Returns the `design` data frame with an additional `choice` column #' identifying the simulated choices. #' @export diff --git a/R/design.R b/R/design.R index 9a7f90c..ce18a08 100644 --- a/R/design.R +++ b/R/design.R @@ -62,7 +62,7 @@ #' used with these designs where blocks are created from subsets of the full #' factorial design. For more information about blocking with full factorial #' designs, see `?DoE.base::fac.design` as well as the JSS article on the -#' {DoE.base} package (Grömping, 2018) \doi{10.18637/jss.v085.i05}. +#' {DoE.base} package (Grömping, 2018). #' #' The `"orthogonal"` method first finds an orthogonal array from the full #' set of `profiles` (if possible), then randomly selects from it to create @@ -72,7 +72,7 @@ #' main effects at the expense of information about interaction effects. For #' more information about orthogonal designs, see `?DoE.base::oa.design` as #' well as the JSS article on the {DoE.base} package -#' (Grömping, 2018) \doi{10.18637/jss.v085.i05}. +#' (Grömping, 2018). #' #' For Bayesian D-efficient designs, use `"CEA"` or `"Modfed"` along with #' specified `priors`. If `method` is set to `"CEA"` or `"Modfed"` but without @@ -81,13 +81,12 @@ #' only the `"Modfed"` method can be used as `"CEA"` requires unrestricted #' `profiles`. For more details on Bayesian D-efficient designs, see #' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the -#' {idefix} package (Traets et al, 2020) \doi{10.18637/jss.v096.i03}. +#' {idefix} package (Traets et al, 2020). #' @references -#' Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal -#' of Statistical Software, 85(5), 1–41. \doi{10.18637/jss.v085.i05}. -#' Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal -#' Designs for Discrete Choice Experiments in R: The idefix Package. Journal -#' of Statistical Software, 96(3), 1–41. \doi{10.18637/jss.v096.i03}. +#' Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41, +#' \doi{10.18637/jss.v085.i05} +#' Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, +#' \doi{10.18637/jss.v096.i03} #' @return A data frame containing a choice-based conjoint survey design where #' each row is an alternative. #' @export diff --git a/R/power.R b/R/power.R index ed3d212..cce4887 100644 --- a/R/power.R +++ b/R/power.R @@ -7,7 +7,8 @@ #' level of statistical power on each coefficient. The number of models to #' estimate is set by the `nbreaks` argument, which breaks up the data into #' groups of increasing sample sizes. All models are estimated models using -#' the {logitr} package. For more details see Helveston (2023) \doi{10.18637/jss.v105.i10}. +#' the 'logitr' package. For more details see the JSS article on the 'logitr' +#' package (Helveston, 2023). #' @keywords logitr mnl mxl mixed logit sample size power #' #' @param data The data, formatted as a `data.frame` object. @@ -44,6 +45,9 @@ #' @param ... Other arguments that are passed to `logitr::logitr()` for model #' estimation. See the {logitr} documentation for details about other #' available arguments. +#' @references +#' Helveston, J. P. (2023). logitr: Fast Estimation of Multinomial and Mixed Logit Models with Preference Space and Willingness-to-Pay Space Utility Parameterizations. Journal of Statistical Software, 105(10), 1–37, +#' \doi{10.18637/jss.v105.i10} #' @return Returns a data frame of estimated model coefficients and standard #' errors for the same model estimated on subsets of the `data` with increasing #' sample sizes. diff --git a/man/cbc_choices.Rd b/man/cbc_choices.Rd index 676b18a..70190c2 100644 --- a/man/cbc_choices.Rd +++ b/man/cbc_choices.Rd @@ -25,7 +25,9 @@ identifying the simulated choices. } \description{ Simulate choices for a survey design, either randomly or according to a -utility model defined by user-provided prior parameters. +utility model defined by user-provided prior parameters. All choices are +simulated using the 'logitr' package. For more details see the JSS article +on the 'logitr' package (Helveston, 2023). } \examples{ library(cbcTools) @@ -88,6 +90,10 @@ data <- cbc_choices( ) ) } +\references{ +Helveston, J. P. (2023). logitr: Fast Estimation of Multinomial and Mixed Logit Models with Preference Space and Willingness-to-Pay Space Utility Parameterizations. Journal of Statistical Software, 105(10), 1–37, +\doi{10.18637/jss.v105.i10} +} \keyword{logit} \keyword{logitr} \keyword{mixed} diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index cdf00d5..8b1b48b 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -107,7 +107,7 @@ created by randomly selecting from the full set of \code{profiles}. Blocking can used with these designs where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see \code{?DoE.base::fac.design} as well as the JSS article on the -{DoE.base} package (Grömping, 2018) \doi{10.18637/jss.v085.i05}. +{DoE.base} package (Grömping, 2018). The \code{"orthogonal"} method first finds an orthogonal array from the full set of \code{profiles} (if possible), then randomly selects from it to create @@ -117,7 +117,7 @@ a "main effects" design since orthogonal arrays focus the information on the main effects at the expense of information about interaction effects. For more information about orthogonal designs, see \code{?DoE.base::oa.design} as well as the JSS article on the {DoE.base} package -(Grömping, 2018) \doi{10.18637/jss.v085.i05}. +(Grömping, 2018). For Bayesian D-efficient designs, use \code{"CEA"} or \code{"Modfed"} along with specified \code{priors}. If \code{method} is set to \code{"CEA"} or \code{"Modfed"} but without @@ -126,7 +126,7 @@ stating this will be shown. If you are using a restricted set of \code{profiles} only the \code{"Modfed"} method can be used as \code{"CEA"} requires unrestricted \code{profiles}. For more details on Bayesian D-efficient designs, see \code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the -{idefix} package (Traets et al, 2020) \doi{10.18637/jss.v096.i03}. +{idefix} package (Traets et al, 2020). } \examples{ library(cbcTools) @@ -196,11 +196,10 @@ design_bayesian <- cbc_design( ) } \references{ -Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal -of Statistical Software, 85(5), 1–41. \doi{10.18637/jss.v085.i05}. -Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal -Designs for Discrete Choice Experiments in R: The idefix Package. Journal -of Statistical Software, 96(3), 1–41. \doi{10.18637/jss.v096.i03}. +Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41, +\doi{10.18637/jss.v085.i05} +Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, +\doi{10.18637/jss.v096.i03} } \keyword{DoE.base} \keyword{design} diff --git a/man/cbc_power.Rd b/man/cbc_power.Rd index c867983..b3f5c86 100644 --- a/man/cbc_power.Rd +++ b/man/cbc_power.Rd @@ -83,7 +83,8 @@ is useful for determining the required sample size for obtaining a desired level of statistical power on each coefficient. The number of models to estimate is set by the \code{nbreaks} argument, which breaks up the data into groups of increasing sample sizes. All models are estimated models using -the {logitr} package. For more details see Helveston (2023) \doi{10.18637/jss.v105.i10}. +the 'logitr' package. For more details see the JSS article on the 'logitr' +package (Helveston, 2023). } \examples{ library(cbcTools) @@ -123,6 +124,10 @@ power <- cbc_power( n_cores = 2 ) } +\references{ +Helveston, J. P. (2023). logitr: Fast Estimation of Multinomial and Mixed Logit Models with Preference Space and Willingness-to-Pay Space Utility Parameterizations. Journal of Statistical Software, 105(10), 1–37, +\doi{10.18637/jss.v105.i10} +} \keyword{logit} \keyword{logitr} \keyword{mixed} diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index e6ac399..c9457b8 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -358,7 +358,7 @@ One of the convenient features of how the package is written is that the object ```{r} #| eval: false -power_plot <- cbc_profiles( +cbc_profiles( price = seq(1, 4, 0.5), # $ per pound type = c('Fuji', 'Gala', 'Honeycrisp'), freshness = c('Poor', 'Average', 'Excellent') @@ -388,3 +388,5 @@ plot() ```{r, ref.label='power', echo=FALSE} ``` + +# References From 1424f46b2094bc28a2a20f900b9c9e8f974c02e7 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 14:04:56 -0400 Subject: [PATCH 15/17] added `plot_compare_power()` --- NAMESPACE | 2 + R/methods.R | 7 +-- R/power.R | 80 +++++++++++++++++++++++++++++++++++ man/miscmethods.cbc_errors.Rd | 7 +-- man/plot_compare_power.Rd | 64 ++++++++++++++++++++++++++++ pkgdown/_pkgdown.yml | 1 + vignettes/usage.Rmd | 40 ++++++++++++++++++ 7 files changed, 195 insertions(+), 6 deletions(-) create mode 100644 man/plot_compare_power.Rd diff --git a/NAMESPACE b/NAMESPACE index e58bae9..11c2d24 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,11 +9,13 @@ export(cbc_overlap) export(cbc_power) export(cbc_profiles) export(cbc_restrict) +export(plot_compare_power) export(randLN) export(randN) importFrom(ggplot2,aes) importFrom(ggplot2,element_blank) importFrom(ggplot2,expand_limits) +importFrom(ggplot2,facet_wrap) importFrom(ggplot2,geom_hline) importFrom(ggplot2,geom_point) importFrom(ggplot2,ggplot) diff --git a/R/methods.R b/R/methods.R index 28f97dd..e09425c 100644 --- a/R/methods.R +++ b/R/methods.R @@ -51,12 +51,13 @@ print.cbc_models <- function ( #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a randomized survey design +#' # Make a survey design from all possible profiles +#' # (This is the default setting where method = 'full' for "full factorial") #' design <- cbc_design( #' profiles = profiles, #' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6 # Number of questions per respondent +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6 # Number of questions per respondent #' ) #' #' # Simulate random choices diff --git a/R/power.R b/R/power.R index cce4887..c49fad0 100644 --- a/R/power.R +++ b/R/power.R @@ -235,3 +235,83 @@ extract_errors <- function(models) { row.names(results) <- NULL return(results) } + +#' Plot a comparison of different design powers +#' +#' This function creates a ggplot2 object comparing the power curves of +#' different designs. Each design is color coded and each facet (sub plot) +#' is a model coefficient. +#' @param ... Any number of data frame containing power results obtained from +#' the `cbc_power()` function, separated by commas. +#' @return A plot comparing the power curves of different designs. +#' @importFrom ggplot2 ggplot aes geom_hline geom_point expand_limits theme_bw +#' theme element_blank labs facet_wrap +#' @importFrom rlang .data +#' @export +#' @examples +#' library(cbcTools) +#' +#' # Generate all possible profiles +#' profiles <- cbc_profiles( +#' price = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5), +#' type = c("Fuji", "Gala", "Honeycrisp"), +#' freshness = c('Poor', 'Average', 'Excellent') +#' ) +#' +#' # Make designs to compare: full factorial vs bayesian d-efficient +#' design_full <- cbc_design( +#' profiles = profiles, +#' n_resp = 300, n_alts = 3, n_q = 6 +#' ) +#' # Same priors will be used in bayesian design and simulated choices +#' priors <- list( +#' price = -0.1, +#' type = c(0.1, 0.2), +#' freshness = c(0.1, 0.2) +#' ) +#' design_bayesian <- cbc_design( +#' profiles = profiles, +#' n_resp = 300, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", +#' priors = priors, parallel = FALSE +#' ) +#' +#' # Obtain power for each design by simulating choices +#' power_full <- design_full |> +#' cbc_choices(obsID = "obsID", priors = priors) |> +#' cbc_power( +#' pars = c("price", "type", "freshness"), +#' outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 +#' ) +#' power_bayesian <- design_bayesian |> +#' cbc_choices(obsID = "obsID", priors = priors) |> +#' cbc_power( +#' pars = c("price", "type", "freshness"), +#' outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 +#' ) +#' +#' # Compare power of each design +#' plot_compare_power(power_bayesian, power_full) +plot_compare_power <- function(...) { + power <- list(...) + design_names <- unlist(lapply(as.list(match.call())[-1], deparse)) + names(power) <- design_names + for (i in 1:length(power)) { + power[[i]]$design <- names(power)[i] + } + power <- do.call(rbind, power) + ggplot2::ggplot(power) + + geom_hline(yintercept = 0.05, color = "red", linetype = 2) + + geom_point( + aes(x = .data$sampleSize, y = .data$se, color = .data$design), + size = 1.8 + ) + + facet_wrap(~.data$coef) + + expand_limits(y = 0) + + theme_bw(base_size = 14) + + theme(panel.grid.minor = element_blank()) + + labs( + color = "Design", + x = "Sample size", + y = "Standard error" + ) +} diff --git a/man/miscmethods.cbc_errors.Rd b/man/miscmethods.cbc_errors.Rd index be47eab..00858e0 100644 --- a/man/miscmethods.cbc_errors.Rd +++ b/man/miscmethods.cbc_errors.Rd @@ -31,12 +31,13 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a randomized survey design +# Make a survey design from all possible profiles +# (This is the default setting where method = 'full' for "full factorial") design <- cbc_design( profiles = profiles, n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6 # Number of questions per respondent + n_alts = 3, # Number of alternatives per question + n_q = 6 # Number of questions per respondent ) # Simulate random choices diff --git a/man/plot_compare_power.Rd b/man/plot_compare_power.Rd new file mode 100644 index 0000000..737cc74 --- /dev/null +++ b/man/plot_compare_power.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/power.R +\name{plot_compare_power} +\alias{plot_compare_power} +\title{Plot a comparison of different design powers} +\usage{ +plot_compare_power(...) +} +\arguments{ +\item{...}{Any number of data frame containing power results obtained from +the \code{cbc_power()} function, separated by commas.} +} +\value{ +A plot comparing the power curves of different designs. +} +\description{ +This function creates a ggplot2 object comparing the power curves of +different designs. Each design is color coded and each facet (sub plot) +is a model coefficient. +} +\examples{ +library(cbcTools) + +# Generate all possible profiles +profiles <- cbc_profiles( + price = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5), + type = c("Fuji", "Gala", "Honeycrisp"), + freshness = c('Poor', 'Average', 'Excellent') +) + +# Make designs to compare: full factorial vs bayesian d-efficient +design_full <- cbc_design( + profiles = profiles, + n_resp = 300, n_alts = 3, n_q = 6 +) +# Same priors will be used in bayesian design and simulated choices +priors <- list( + price = -0.1, + type = c(0.1, 0.2), + freshness = c(0.1, 0.2) +) +design_bayesian <- cbc_design( + profiles = profiles, + n_resp = 300, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", + priors = priors, parallel = FALSE +) + +# Obtain power for each design by simulating choices +power_full <- design_full |> +cbc_choices(obsID = "obsID", priors = priors) |> + cbc_power( + pars = c("price", "type", "freshness"), + outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + ) +power_bayesian <- design_bayesian |> + cbc_choices(obsID = "obsID", priors = priors) |> + cbc_power( + pars = c("price", "type", "freshness"), + outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + ) + +# Compare power of each design +plot_compare_power(power_bayesian, power_full) +} diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml index 1c4161a..e92352f 100644 --- a/pkgdown/_pkgdown.yml +++ b/pkgdown/_pkgdown.yml @@ -48,6 +48,7 @@ reference: desc: "Functions for conducting a power analysis." contents: - cbc_power + - plot_compare_power - title: "Methods" desc: "Various methods." contents: diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index c9457b8..75b2897 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -389,4 +389,44 @@ plot() ```{r, ref.label='power', echo=FALSE} ``` +## Comparing multiple designs + +When evaluating multiple designs, it can be helpful to visually compare their respective power curves. This can be done using the `plot_compare_power()` function. To use it, you have to first create different designs and then simulate the power of each design by simulating choices. Here is an example comparing a full factorial design against a Bayesian D-efficient design: + +```{r} +# Make designs to compare: full factorial vs bayesian d-efficient +design_full <- cbc_design( + profiles = profiles, + n_resp = 300, n_alts = 3, n_q = 6 +) +# Same priors will be used in bayesian design and simulated choices +priors <- list( + price = -0.1, + type = c(0.1, 0.2), + freshness = c(0.1, 0.2) +) +design_bayesian <- cbc_design( + profiles = profiles, + n_resp = 300, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", + priors = priors, parallel = FALSE +) + +# Obtain power for each design by simulating choices +power_full <- design_full |> +cbc_choices(obsID = "obsID", priors = priors) |> + cbc_power( + pars = c("price", "type", "freshness"), + outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + ) +power_bayesian <- design_bayesian |> + cbc_choices(obsID = "obsID", priors = priors) |> + cbc_power( + pars = c("price", "type", "freshness"), + outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + ) + +# Compare power of each design +plot_compare_power(power_bayesian, power_full) +``` + # References From 2daf0161a16357e90aea1fa42e0c452226a4f3fc Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 14:30:12 -0400 Subject: [PATCH 16/17] Bump to v0.4.0 --- CRAN-SUBMISSION | 4 ---- DESCRIPTION | 6 +++--- 2 files changed, 3 insertions(+), 7 deletions(-) delete mode 100644 CRAN-SUBMISSION diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION deleted file mode 100644 index 9ae0bec..0000000 --- a/CRAN-SUBMISSION +++ /dev/null @@ -1,4 +0,0 @@ -Version: 0.3.4 -Date: 2023-06-13 18:09:36 UTC -SHA: - dcb6aa72d97c124bb6194b5a5334c698d7062eb3 diff --git a/DESCRIPTION b/DESCRIPTION index e4e7b21..3cb3632 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cbcTools -Title: Design and Evaluate Choice-Based Conjoint Survey Experiments -Version: 0.3.4 +Title: A Simulation-Based Workflow to Design and Evaluate Choice-Based Conjoint Survey Experiments +Version: 0.4.0 Maintainer: John Helveston Authors@R: c( person(given = "John", @@ -8,7 +8,7 @@ Authors@R: c( role = c("cre", "aut", "cph"), email = "john.helveston@gmail.com", comment = c(ORCID = "0000-0002-2657-9191"))) -Description: Design and evaluate choice-based conjoint survey experiments in R. Generate a variety of survey designs, including full factorial designs, orthogonal designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct power analyses on a survey design by estimating the same model multiple times using different subsets of the data to simulate different sample sizes. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Choice simulation and model estimation are handled using the 'logitr' package (Helveston, 2023) . +Description: A simulation-based workflow to design and evaluate choice-based conjoint survey experiments. Generate a variety of survey designs, including full factorial designs, orthogonal designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct a power analysis for a given survey design by estimating the same model on different subsets of the data to simulate different sample sizes. Choice simulation and model estimation in power analyses are handled using the 'logitr' package (Helveston, 2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true From 6049d31ad71bde5e073eaf6c42e4f12c745265b4 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 30 Jun 2023 14:31:44 -0400 Subject: [PATCH 17/17] Create CRAN-SUBMISSION --- CRAN-SUBMISSION | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 CRAN-SUBMISSION diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..f1131c4 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 0.4.0 +Date: 2023-06-30 18:30:53 UTC +SHA: 2daf0161a16357e90aea1fa42e0c452226a4f3fc