From 4364084908c91e5fef78470c00a34c87b29106a1 Mon Sep 17 00:00:00 2001 From: John Helveston Date: Mon, 3 Jul 2023 11:48:01 -0400 Subject: [PATCH 01/43] prevent orthogonal designs from using `label` or restricted profiles --- R/design.R | 10 ++++++---- R/input_checks.R | 37 +++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/R/design.R b/R/design.R index ce18a08..21265b9 100644 --- a/R/design.R +++ b/R/design.R @@ -175,6 +175,8 @@ cbc_design <- function( parallel = FALSE ) { method <- check_design_method(method, priors) + profile_list <- get_profile_list(profiles) + profiles_restricted <- nrow(expand.grid(profile_list)) > nrow(profiles) check_inputs_design( profiles, n_resp, @@ -191,7 +193,8 @@ cbc_design <- function( probs, keep_db_error, max_iter, - parallel + parallel, + profiles_restricted ) profiles <- as.data.frame(profiles) # tibbles break things if (method == 'full') { @@ -206,7 +209,7 @@ cbc_design <- function( design <- make_design_bayesian( profiles, n_resp, n_alts, n_q, n_blocks, n_draws, n_start, no_choice, label, method, priors, prior_no_choice, probs, keep_db_error, max_iter, - parallel + parallel, profiles_restricted ) } # Reset row numbers @@ -473,7 +476,7 @@ make_design_orthogonal <- function( make_design_bayesian <- function( profiles, n_resp, n_alts, n_q, n_blocks, n_draws, n_start, no_choice, label, method, priors, prior_no_choice, probs, keep_db_error, max_iter, - parallel + parallel, profiles_restricted ) { # Set up levels and coding profile_list <- get_profile_list(profiles) @@ -525,7 +528,6 @@ make_design_bayesian <- function( } # Make the design - profiles_restricted <- nrow(expand.grid(lvl.names)) > nrow(profiles) if (profiles_restricted & (method == "CEA")) { # "CEA" method only works with unrestricted profile set method <- "Modfed" diff --git a/R/input_checks.R b/R/input_checks.R index b9308db..121a189 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -58,7 +58,8 @@ check_inputs_design <- function( probs, keep_db_error, max_iter, - parallel + parallel, + profiles_restricted ) { if (n_blocks < 1) { @@ -69,14 +70,24 @@ check_inputs_design <- function( stop("Maximum allowable number of blocks is one block per respondent") } - # If using a Bayesian D-efficient design with a no choice option, user must - # specify a value for prior_no_choice - if (no_choice) { - if (!is.null(priors) & is.null(prior_no_choice)) { + # Check that an appropriate method is used + + if (! method %in% c('full', 'orthogonal', 'CEA', 'Modfed')) { stop( - 'If "no_choice = TRUE", you must specify the prior utility ', - 'value for the "no choice" option using prior_no_choice' + 'The "method" argument must be set to "full", "orthogonal", ', + '"Modfed", or "CEA"' ) + } + + # Check that orthogonal design method does not use a label or a + # restricted set of profiles + + if (method == 'orthogonal') { + if (!is.null(label)) { + stop("Orthogonal designs cannot be labeled.") + } + if (profiles_restricted) { + stop("Orthogonal designs cannot use restricted profiles") } } @@ -89,13 +100,15 @@ check_inputs_design <- function( ) } - # Check that an appropriate method is used - - if (! method %in% c('full', 'orthogonal', 'CEA', 'Modfed')) { + # If using a Bayesian D-efficient design with a no choice option, user must + # specify a value for prior_no_choice + if (no_choice) { + if (!is.null(priors) & is.null(prior_no_choice)) { stop( - 'The "method" argument must be set to "full", "orthogonal", ', - '"Modfed", or "CEA"' + 'If "no_choice = TRUE", you must specify the prior utility ', + 'value for the "no choice" option using prior_no_choice' ) + } } # Check that priors are appropriate if specified From 5d03b73026f1c2b83dcfc85ea1eeb91f6cdef055 Mon Sep 17 00:00:00 2001 From: John Helveston Date: Mon, 3 Jul 2023 13:03:38 -0400 Subject: [PATCH 02/43] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index a60f316..6f8c68b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # cbcTools (development version) +- Added restrictions so that orthogonal designs cannot use the `label` argument or restricted profile sets (as either of these would result in a non-orthogonal design). + # cbcTools 0.4.0 - Adjustments made to the `method` argument in the `cbc_design()` function in preparation for potentially adding new design methods. From 9669a0d0c1f6e93429b590090239c51a289fdb41 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 3 Jul 2023 15:01:53 -0400 Subject: [PATCH 03/43] Update usage.Rmd --- vignettes/usage.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 75b2897..6e81368 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -85,7 +85,7 @@ Once a set of profiles is obtained, a conjoint survey can then be generated usin ## Full factorial designs -Full factorial designs are obtained by setting `method = 'full'` (the default setting for `method`). This method simply samples from the full set of `profiles`, ensuring that no two profiles are the same in any choice question. Blocking can used with these designs where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Grömping2018]. +Full factorial designs are obtained by setting `method = 'full'` (the default setting for `method`). This method simply samples from the full set of `profiles`, ensuring that no two profiles are the same in any choice question and that no two choice sets are the same for any one respondent. Blocking can be implemented where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Grömping2018]. The resulting `design` data frame includes the following columns: From fe495e606a45ad27a3f510bdb1899e27bde702a6 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 08:39:12 -0400 Subject: [PATCH 04/43] made function for repeating a design set into a design survey --- R/design.R | 43 +++++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 16 deletions(-) diff --git a/R/design.R b/R/design.R index 21265b9..af15975 100644 --- a/R/design.R +++ b/R/design.R @@ -225,9 +225,9 @@ get_randomized_design <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { if (is.null(label)) { - design <- get_design_rand(profiles, n_resp, n_alts, n_q) + design <- design_rand_sample(profiles, n_resp, n_alts, n_q) } else { - design <- get_design_rand_label(profiles, n_resp, n_alts, n_q, label) + design <- design_rand_sample_label(profiles, n_resp, n_alts, n_q, label) } if (no_choice) { design <- add_no_choice(design, n_alts) @@ -236,7 +236,7 @@ get_randomized_design <- function( return(design) } -get_design_rand <- function(profiles, n_resp, n_alts, n_q) { +design_rand_sample <- function(profiles, n_resp, n_alts, n_q) { design <- sample_profiles(profiles, size = n_resp * n_alts * n_q) design <- add_metadata(design, n_resp, n_alts, n_q) # Replace rows with duplicated profiles in each obsID or @@ -298,7 +298,7 @@ dup_obs_by_resp <- function(df) { df$profileID, df$obsID, FUN = function(x) sort(x) ) - # Convert the list of vectors to a data frame to check for duplicates + # Convert vector list to a data frame to check for duplicates dupe_df <- do.call(rbind, profiles_list) dup_ids <- which(duplicated(dupe_df)) if (length(dup_ids) > 0) { @@ -307,7 +307,7 @@ dup_obs_by_resp <- function(df) { return(NULL) } -get_design_rand_label <- function(profiles, n_resp, n_alts, n_q, label) { +design_rand_sample_label <- function(profiles, n_resp, n_alts, n_q, label) { n_levels <- length(unique(profiles[, label])) if (n_levels != n_alts) { warning( @@ -588,18 +588,8 @@ make_design_bayesian <- function( design$probs <- as.vector(t(D$probs)) } - # Add blockIDs - design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) - # Repeat design to match number of respondents - n_reps <- ceiling(n_resp / n_blocks) - design <- design[rep(seq_len(nrow(design)), n_reps), ] - row.names(design) <- NULL - design <- design[1:(n_resp*n_q*n_alts), ] - - # Add metadata - design <- add_metadata(design, n_resp, n_alts, n_q) - design <- reorder_cols(design) + design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) # Print DB error message( @@ -705,6 +695,27 @@ join_profiles <- function(design, profiles, type_ids) { return(design) } +repeat_design <- function(design, n_resp, n_alts, n_q, n_blocks) { + # Add blockIDs + design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) + + # Repeat design to match number of respondents + n_reps <- ceiling(n_resp / n_blocks) + design <- design[rep(seq_len(nrow(design)), n_reps), ] + row.names(design) <- NULL + design <- design[1:(n_resp*n_q*n_alts), ] + + # Add metadata + design <- add_metadata(design, n_resp, n_alts, n_q) + design <- reorder_cols(design) + return(design) +} + + + + + + add_no_choice_deff <- function(design, n_alts, varnames_discrete) { # First dummy code categorical variables design$obsID <- rep(seq(nrow(design) / n_alts), each = n_alts) From 2488a1d8cf3ee8d8aec49a904b108661ec651581 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 09:00:03 -0400 Subject: [PATCH 05/43] distinguishing between full and random designs --- R/design.R | 62 +++++++++++++++++++++++++++++++++--------------- R/input_checks.R | 6 ++--- 2 files changed, 46 insertions(+), 22 deletions(-) diff --git a/R/design.R b/R/design.R index af15975..9ed709f 100644 --- a/R/design.R +++ b/R/design.R @@ -30,9 +30,9 @@ #' Currently not compatible with Bayesian D-efficient designs. If used, #' the `n_alts` argument will be ignored as its value is defined by the unique #' number of levels in the `label` variable. Defaults to `NULL`. -#' @param method Choose the design method to use: `"full"`, `"orthogonal"`, -#' `"CEA"` or `"Modfed"`. Defaults to `"full"`. See details below for complete -#' description of each method. +#' @param method Choose the design method to use: `"random"`, `"full"`, +#' `"orthogonal"`, `"CEA"` or `"Modfed"`. Defaults to `"random"`. See details +#' below for complete description of each method. #' @param priors A list of one or more assumed prior parameters used to #' generate a Bayesian D-efficient design. Defaults to `NULL` #' @param prior_no_choice Prior utility value for the "no choice" alternative. @@ -52,27 +52,51 @@ #' @details #' The `method` argument determines the design method used. Options are: #' +#' - `"random"` #' - `"full"` #' - `"orthogonal"` #' - `"CEA"` #' - `"Modfed"` #' -#' The `"full"` method uses a "full factorial" design where choice sets are -#' created by randomly selecting from the full set of `profiles`. Blocking can -#' used with these designs where blocks are created from subsets of the full -#' factorial design. For more information about blocking with full factorial -#' designs, see `?DoE.base::fac.design` as well as the JSS article on the -#' {DoE.base} package (Grömping, 2018). +#' All methods ensure that the two following criteria are met: #' -#' The `"orthogonal"` method first finds an orthogonal array from the full -#' set of `profiles` (if possible), then randomly selects from it to create -#' choice sets. For some designs an orthogonal array can't be found, in which -#' case a full factorial design is used. This approach is also sometimes called -#' a "main effects" design since orthogonal arrays focus the information on the -#' main effects at the expense of information about interaction effects. For -#' more information about orthogonal designs, see `?DoE.base::oa.design` as -#' well as the JSS article on the {DoE.base} package -#' (Grömping, 2018). +#' 1. No two profiles are the same within any one choice set. +#' 2. No two choice sets are the same within any one respondent. +#' +#' A "no choice" option (created with the `no_choice` argument) can be added +#' using any design method. +#' +#' A "labeled" design (created with the `label` argument) can only be used +#' with `"random"` or `"full"` designs. +#' +#' Finally, restricted profile sets cannot be used with the `"orthogonal"` or +#' `"CEA"` methods. +#' +#' The `"random"` method (the default) creates a design where choice sets are +#' created by randomly sampling from the full set of `profiles`. This means +#' that few (if any) respondents will see the same sets of choice sets. This +#' method is less efficient than other approaches and may lead to a deficient +#' experiment in smaller sample sizes, though it guarantees equal ability to +#' estimate main and interaction effects. +#' +#' The `"full"` method for ("full factorial") creates a design where a fixed +#' set of choice sets is created by randomly sampling from the full set of +#' `profiles` and then repeated for each respondent in the survey. The number +#' of unique choice sets is determined by `n_q*n_blocks`. If blocking is used, +#' blocks are created as mutually exclusive subsets of `profiles`. For more +#' information about blocking with full factorial designs, see +#' `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} +#' package (Grömping, 2018). +#' +#' The `"orthogonal"` method creates a design where an orthogonal array from +#' the full set of `profiles` is first found (if possible) and then repeated +#' for each respondent in the survey. For some designs an orthogonal array +#' can't be found, in which case a full factorial design is used. This +#' approach is also sometimes called a "main effects" design since orthogonal +#' arrays focus the information on the main effects at the expense of +#' information about interaction effects. For more information about +#' orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article +#' on the {DoE.base} package (Grömping, 2018). #' #' For Bayesian D-efficient designs, use `"CEA"` or `"Modfed"` along with #' specified `priors`. If `method` is set to `"CEA"` or `"Modfed"` but without @@ -166,7 +190,7 @@ cbc_design <- function( n_start = 5, no_choice = FALSE, label = NULL, - method = "full", + method = "random", priors = NULL, prior_no_choice = NULL, probs = FALSE, diff --git a/R/input_checks.R b/R/input_checks.R index 121a189..b06d2c4 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -72,10 +72,10 @@ check_inputs_design <- function( # Check that an appropriate method is used - if (! method %in% c('full', 'orthogonal', 'CEA', 'Modfed')) { + if (! method %in% c('random', 'full', 'orthogonal', 'CEA', 'Modfed')) { stop( - 'The "method" argument must be set to "full", "orthogonal", ', - '"Modfed", or "CEA"' + 'The "method" argument must be set to "random", "full", ', + '"orthogonal", "Modfed", or "CEA"' ) } From dfd8f7159ad2b84c1cc09721211b204ee968b2ec Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 10:57:49 -0400 Subject: [PATCH 06/43] refactored how full blocked designs are created --- R/design.R | 185 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 105 insertions(+), 80 deletions(-) diff --git a/R/design.R b/R/design.R index 9ed709f..0db176a 100644 --- a/R/design.R +++ b/R/design.R @@ -63,14 +63,15 @@ #' 1. No two profiles are the same within any one choice set. #' 2. No two choice sets are the same within any one respondent. #' -#' A "no choice" option (created with the `no_choice` argument) can be added -#' using any design method. +#' The table below summarizes method compatibility with other design options. #' -#' A "labeled" design (created with the `label` argument) can only be used -#' with `"random"` or `"full"` designs. -#' -#' Finally, restricted profile sets cannot be used with the `"orthogonal"` or -#' `"CEA"` methods. +#' | Method | Include "no choice"? | Labeled designs? | Use restricted profiles? | Blocking? | +#' |----------------|----------------------|-------------------|--------------------------|-----------| +#' | `"random"` | ✅ | ✅ | ✅ | ❌ | +#' | `"full"` | ✅ | ✅ | ✅ | ✅ | +#' | `"orthogonal"` | ✅ | ❌ | ❌ | ✅ | +#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | +#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | #' #' The `"random"` method (the default) creates a design where choice sets are #' created by randomly sampling from the full set of `profiles`. This means @@ -81,29 +82,28 @@ #' #' The `"full"` method for ("full factorial") creates a design where a fixed #' set of choice sets is created by randomly sampling from the full set of -#' `profiles` and then repeated for each respondent in the survey. The number +#' `profiles` and then is repeated for each respondent in the survey. The number #' of unique choice sets is determined by `n_q*n_blocks`. If blocking is used, -#' blocks are created as mutually exclusive subsets of `profiles`. For more -#' information about blocking with full factorial designs, see -#' `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} -#' package (Grömping, 2018). +#' choice set blocks are created using mutually exclusive subsets of `profiles` +#' within each block. For more information about blocking with full factorial +#' designs, see `?DoE.base::fac.design` as well as the JSS article on the +#' {DoE.base} package (Grömping, 2018). #' #' The `"orthogonal"` method creates a design where an orthogonal array from -#' the full set of `profiles` is first found (if possible) and then repeated -#' for each respondent in the survey. For some designs an orthogonal array -#' can't be found, in which case a full factorial design is used. This -#' approach is also sometimes called a "main effects" design since orthogonal -#' arrays focus the information on the main effects at the expense of -#' information about interaction effects. For more information about -#' orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article -#' on the {DoE.base} package (Grömping, 2018). +#' the full set of `profiles` is found. Choice sets are then created by +#' randomly sampling from this orthogonal array and then is repeated for each +#' respondent in the survey. For cases where an orthogonal array cannot be +#' found, a full factorial design is used. This approach is also sometimes +#' called a "main effects" design since orthogonal arrays focus the information +#' on the main effects at the expense of information about interaction effects. +#' For more information about orthogonal designs, see `?DoE.base::oa.design` as +#' well as the JSS article on the {DoE.base} package (Grömping, 2018). #' -#' For Bayesian D-efficient designs, use `"CEA"` or `"Modfed"` along with -#' specified `priors`. If `method` is set to `"CEA"` or `"Modfed"` but without -#' `priors` specified, a prior of all `0`s will be used and a warning message -#' stating this will be shown. If you are using a restricted set of `profiles`, -#' only the `"Modfed"` method can be used as `"CEA"` requires unrestricted -#' `profiles`. For more details on Bayesian D-efficient designs, see +#' The `"CEA"` or `"Modfed"` methods create Bayesian D-efficient designs along +#' with specified `priors`. If `"CEA"` or `"Modfed"` is used without specifying +#' `priors`, a prior of all `0`s will be used and a warning message +#' stating this will be shown. Restricted sets of `profiles` can only be used +#' with `"Modfed"`. For more details on Bayesian D-efficient designs, see #' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the #' {idefix} package (Traets et al, 2020). #' @references @@ -199,8 +199,7 @@ cbc_design <- function( parallel = FALSE ) { method <- check_design_method(method, priors) - profile_list <- get_profile_list(profiles) - profiles_restricted <- nrow(expand.grid(profile_list)) > nrow(profiles) + profiles_restricted <- nrow(expand.grid(get_profile_list(profiles))) > nrow(profiles) check_inputs_design( profiles, n_resp, @@ -221,7 +220,11 @@ cbc_design <- function( profiles_restricted ) profiles <- as.data.frame(profiles) # tibbles break things - if (method == 'full') { + if (method == 'random') { + design <- make_design_random( + profiles, n_resp, n_alts, n_q, no_choice, label + ) + } else if (method == 'full') { design <- make_design_full( profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) @@ -241,11 +244,11 @@ cbc_design <- function( return(design) } -# Randomize the design ---- +# Random Design ---- # Sample from profiles to create randomized choice sets -get_randomized_design <- function( +make_design_random <- function( profiles, n_resp, n_alts, n_q, no_choice, label ) { if (is.null(label)) { @@ -257,6 +260,7 @@ get_randomized_design <- function( design <- add_no_choice(design, n_alts) } design <- reorder_cols(design) + row.names(design) <- NULL return(design) } @@ -286,13 +290,11 @@ sample_profiles <- function(profiles, size) { ) } -add_metadata <- function(design, n_resp, n_alts, n_q) { - n_rows_per_resp <- n_alts * n_q - design$respID <- rep(seq(n_resp), each = n_rows_per_resp) - design$qID <- rep(rep(seq(n_q), each = n_alts), n_resp) - design$altID <- rep(seq(n_alts), n_resp * n_q) - design$obsID <- rep(seq(n_resp * n_q), each = n_alts) - row.names(design) <- NULL +add_metadata <- function(design, n_resp, n_alts, n_q, n_blocks) { + design$respID <- rep(seq(n_resp), each = n_alts * n_q) + design$qID <- rep(rep(seq(n_q), each = n_alts), n_resp) + design$altID <- rep(seq(n_alts), n_resp * n_q) + design$obsID <- rep(seq(n_resp * n_q), each = n_alts) return(design) } @@ -413,7 +415,7 @@ get_col_types <- function(data) { } reorder_cols <- function(design) { - metaNames <- c("profileID", "respID", "qID", "altID", "obsID") + metaNames <- c("profileID", "respID", "qID", "altID", "obsID", "blockID") varNames <- setdiff(names(design), metaNames) design <- as.data.frame(design)[, c(metaNames, varNames)] return(design) @@ -425,19 +427,21 @@ make_design_full <- function( profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) { if (n_blocks > 1) { - design <- make_design_full_blocked( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label - ) + design <- make_design_full_blocked(profiles, n_resp, n_alts, n_q, n_blocks) } else { - design <- get_randomized_design( - profiles, n_resp, n_alts, n_q, no_choice, label - ) + design <- make_random_sets(profiles, n_alts, n_q) + design$blockID <- 1 + design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) + } + if (no_choice) { + design <- add_no_choice(design, n_alts) } + design <- reorder_cols(design) return(design) } make_design_full_blocked <- function( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks ) { # Make blocks design <- suppressMessages(as.data.frame( @@ -447,27 +451,68 @@ make_design_full_blocked <- function( block.name = "blockID" ) )) + # Make blockID a number, then join on profileIDs design$blockID <- as.numeric(as.character(design$blockID)) design <- design[,c(names(profiles)[2:ncol(profiles)], "blockID")] type_ids <- get_type_ids(profiles) profiles <- join_profiles(design, profiles, type_ids) - # Randomize design within each block + # Create random choice sets within each block + design <- design_rand_sets_blocked(profiles, n_resp, n_alts, n_q, n_blocks) + return(design) +} + +design_rand_sets_blocked <- function(profiles, n_resp, n_alts, n_q, n_blocks) { + # Make choice sets for each set of profileIDs profiles <- split(profiles, profiles$blockID) - # Make sure number of respondents divides well into blocks - n_resp_list <- rep(n_resp / n_blocks, n_blocks) - if (! all(n_resp_list %% 1 == 0)) { - n_resp_list <- floor(n_resp_list) - n_resp_list[n_blocks] <- n_resp_list[n_blocks] + 1 - } design <- list() for (i in 1:n_blocks) { - design[[i]] <- get_randomized_design( - profiles[[i]], n_resp_list[i], n_alts, n_q, no_choice, label - ) + profiles_temp <- profiles[[i]] + design[[i]] <- make_random_sets(profiles_temp, n_alts, nrow(profiles_temp)) + } + design <- do.call(rbind, design) + design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) + design <- add_metadata(design, n_resp, n_alts, n_q) + return(design) +} + +make_random_sets <- function(profiles, n_alts, n_q) { + design <- sample_random_sets(profiles, n_alts, n_q) + # Replace rows with duplicated profiles in each obsID or + # duplicated choice sets in each respID + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) + while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { + design <- sample_random_sets(profiles, n_alts, n_q) + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) } + return(design) +} + +sample_random_sets <- function(design, n_alts, n_q) { + # Make a randomized copy of the design for each alternative + design <- lapply(seq(n_alts), function(x) design[order(runif(n_q)),]) + design <- lapply(design, function(x) { + x$order <- seq(nrow(x)) + return(x) + }) design <- do.call(rbind, design) + design <- design[order(design$order),] + design <- add_metadata(design, n_resp = 1, n_alts, n_q) + design$order <- NULL + return(design) +} + +repeat_design <- function(design, n_resp, n_alts, n_q, n_blocks) { + # Repeat design to match number of respondents + n_reps <- ceiling(n_resp / n_blocks) + design <- design[rep(seq_len(nrow(design)), n_reps), ] + design <- design[1:(n_resp*n_q*n_alts), ] + + # Add metadata design <- add_metadata(design, n_resp, n_alts, n_q) + design <- reorder_cols(design) return(design) } @@ -489,9 +534,9 @@ make_design_orthogonal <- function( nrow(profiles), " profiles for design" ) } - type_ids <- get_type_ids(profiles) - oa <- join_profiles(oa, profiles, type_ids) - design <- get_randomized_design(oa, n_resp, n_alts, n_q, no_choice, label) + design <- make_random_sets(profiles, n_alts, n_q) + design$blockID <- 1 + design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) return(design) } @@ -613,6 +658,7 @@ make_design_bayesian <- function( } # Repeat design to match number of respondents + design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) # Print DB error @@ -719,27 +765,6 @@ join_profiles <- function(design, profiles, type_ids) { return(design) } -repeat_design <- function(design, n_resp, n_alts, n_q, n_blocks) { - # Add blockIDs - design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) - - # Repeat design to match number of respondents - n_reps <- ceiling(n_resp / n_blocks) - design <- design[rep(seq_len(nrow(design)), n_reps), ] - row.names(design) <- NULL - design <- design[1:(n_resp*n_q*n_alts), ] - - # Add metadata - design <- add_metadata(design, n_resp, n_alts, n_q) - design <- reorder_cols(design) - return(design) -} - - - - - - add_no_choice_deff <- function(design, n_alts, varnames_discrete) { # First dummy code categorical variables design$obsID <- rep(seq(nrow(design) / n_alts), each = n_alts) From 76a0191c33028f2e524e869cdbb580279276d585 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 11:06:20 -0400 Subject: [PATCH 07/43] updated how orthogonal designs are made to match that of full factorial --- R/design.R | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/R/design.R b/R/design.R index 0db176a..42bfe42 100644 --- a/R/design.R +++ b/R/design.R @@ -65,13 +65,13 @@ #' #' The table below summarizes method compatibility with other design options. #' -#' | Method | Include "no choice"? | Labeled designs? | Use restricted profiles? | Blocking? | -#' |----------------|----------------------|-------------------|--------------------------|-----------| -#' | `"random"` | ✅ | ✅ | ✅ | ❌ | -#' | `"full"` | ✅ | ✅ | ✅ | ✅ | -#' | `"orthogonal"` | ✅ | ❌ | ❌ | ✅ | -#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | -#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | +#' | Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? | +#' |----------------|----------------------|-------------------|----------------------|-----------| +#' | `"random"` | ✅ | ✅ | ✅ | ❌ | +#' | `"full"` | ✅ | ✅ | ✅ | ✅ | +#' | `"orthogonal"` | ✅ | ❌ | ❌ | ❌ | +#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | +#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | #' #' The `"random"` method (the default) creates a design where choice sets are #' created by randomly sampling from the full set of `profiles`. This means @@ -534,7 +534,9 @@ make_design_orthogonal <- function( nrow(profiles), " profiles for design" ) } - design <- make_random_sets(profiles, n_alts, n_q) + type_ids <- get_type_ids(profiles) + oa <- join_profiles(oa, profiles, type_ids) + design <- make_random_sets(oa, n_alts, n_q = nrow(oa)) design$blockID <- 1 design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) return(design) From e8ba98ad2e50aceaa592e4bf04371481ca62fb74 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 11:10:23 -0400 Subject: [PATCH 08/43] update documentation --- R/design.R | 41 ++++++++++------- man/cbc_design.Rd | 114 ++++++++++++++++++++++++++++++---------------- 2 files changed, 99 insertions(+), 56 deletions(-) diff --git a/R/design.R b/R/design.R index 42bfe42..9c83b48 100644 --- a/R/design.R +++ b/R/design.R @@ -126,52 +126,61 @@ #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' -#' # Make a survey design from all possible profiles -#' # (This is the default setting where method = 'full' for "full factorial") +#' # Make a survey by randomly sampling from all possible profiles +#' # (This is the default setting where method = 'random') #' design_full <- cbc_design( #' profiles = profiles, -#' n_resp = 300, # Number of respondents +#' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6 # Number of questions per respondent #' ) #' -#' # Make a survey design from an orthogonal array of profiles +#' # Make a survey using a full factorial design +#' design_full <- cbc_design( +#' profiles = profiles, +#' n_resp = 100, # Number of respondents +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6, # Number of questions per respondent +#' method = 'full' +#' ) +#' +#' # Make a survey from an orthogonal array of profiles #' design_ortho <- cbc_design( #' profiles = profiles, -#' n_resp = 300, # Number of respondents +#' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6, # Number of questions per respondent #' method = 'orthogonal' #' ) #' -#' # Make a survey design from all possible profiles -#' # with a "no choice" option +#' # Make a survey by randomly sampling from all possible profiles +#' # and include a "no choice" option #' design_full_nochoice <- cbc_design( #' profiles = profiles, -#' n_resp = 300, # Number of respondents +#' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6, # Number of questions per respondent #' no_choice = TRUE #' ) #' -#' # Make a survey design from all possible profiles +#' # Make a survey by randomly sampling from all possible profiles #' # with each level of the "type" attribute appearing as an alternative #' design_full_labeled <- cbc_design( -#' profiles = profiles, -#' n_resp = 300, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6, # Number of questions per respondent -#' label = "type" +#' profiles = profiles, +#' n_resp = 100, # Number of respondents +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6, # Number of questions per respondent +#' label = "type" #' ) #' #' # Make a Bayesian D-efficient design with a prior model specified #' # Note that by speed can be improved by setting parallel = TRUE #' design_bayesian <- cbc_design( #' profiles = profiles, -#' n_resp = 300, # Number of respondents +#' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6, # Number of questions per respondent -#' n_start = 1, # Defauls to 5, set to 1 here for a quick example +#' n_start = 1, # Defaults to 5, set to 1 here for a quick example #' priors = list( #' price = -0.1, #' type = c(0.1, 0.2), diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index 8b1b48b..bee6593 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -14,7 +14,7 @@ cbc_design( n_start = 5, no_choice = FALSE, label = NULL, - method = "full", + method = "random", priors = NULL, prior_no_choice = NULL, probs = FALSE, @@ -56,9 +56,9 @@ Currently not compatible with Bayesian D-efficient designs. If used, the \code{n_alts} argument will be ignored as its value is defined by the unique number of levels in the \code{label} variable. Defaults to \code{NULL}.} -\item{method}{Choose the design method to use: \code{"full"}, \code{"orthogonal"}, -\code{"CEA"} or \code{"Modfed"}. Defaults to \code{"full"}. See details below for complete -description of each method.} +\item{method}{Choose the design method to use: \code{"random"}, \code{"full"}, +\code{"orthogonal"}, \code{"CEA"} or \code{"Modfed"}. Defaults to \code{"random"}. See details +below for complete description of each method.} \item{priors}{A list of one or more assumed prior parameters used to generate a Bayesian D-efficient design. Defaults to \code{NULL}} @@ -96,35 +96,60 @@ and "labeled" (also known as "alternative specific") designs. \details{ The \code{method} argument determines the design method used. Options are: \itemize{ +\item \code{"random"} \item \code{"full"} \item \code{"orthogonal"} \item \code{"CEA"} \item \code{"Modfed"} } -The \code{"full"} method uses a "full factorial" design where choice sets are -created by randomly selecting from the full set of \code{profiles}. Blocking can -used with these designs where blocks are created from subsets of the full -factorial design. For more information about blocking with full factorial +All methods ensure that the two following criteria are met: +\enumerate{ +\item No two profiles are the same within any one choice set. +\item No two choice sets are the same within any one respondent. +} + +The table below summarizes method compatibility with other design options.\tabular{lllll}{ + Method \tab Include "no choice"? \tab Labeled designs? \tab Restricted profiles? \tab Blocking? \cr + \code{"random"} \tab ✅ \tab ✅ \tab ✅ \tab ❌ \cr + \code{"full"} \tab ✅ \tab ✅ \tab ✅ \tab ✅ \cr + \code{"orthogonal"} \tab ✅ \tab ❌ \tab ❌ \tab ❌ \cr + \code{"CEA"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr + \code{"Modfed"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr +} + + +The \code{"random"} method (the default) creates a design where choice sets are +created by randomly sampling from the full set of \code{profiles}. This means +that few (if any) respondents will see the same sets of choice sets. This +method is less efficient than other approaches and may lead to a deficient +experiment in smaller sample sizes, though it guarantees equal ability to +estimate main and interaction effects. + +The \code{"full"} method for ("full factorial") creates a design where a fixed +set of choice sets is created by randomly sampling from the full set of +\code{profiles} and then is repeated for each respondent in the survey. The number +of unique choice sets is determined by \code{n_q*n_blocks}. If blocking is used, +choice set blocks are created using mutually exclusive subsets of \code{profiles} +within each block. For more information about blocking with full factorial designs, see \code{?DoE.base::fac.design} as well as the JSS article on the {DoE.base} package (Grömping, 2018). -The \code{"orthogonal"} method first finds an orthogonal array from the full -set of \code{profiles} (if possible), then randomly selects from it to create -choice sets. For some designs an orthogonal array can't be found, in which -case a full factorial design is used. This approach is also sometimes called -a "main effects" design since orthogonal arrays focus the information on the -main effects at the expense of information about interaction effects. For -more information about orthogonal designs, see \code{?DoE.base::oa.design} as -well as the JSS article on the {DoE.base} package -(Grömping, 2018). - -For Bayesian D-efficient designs, use \code{"CEA"} or \code{"Modfed"} along with -specified \code{priors}. If \code{method} is set to \code{"CEA"} or \code{"Modfed"} but without -\code{priors} specified, a prior of all \code{0}s will be used and a warning message -stating this will be shown. If you are using a restricted set of \code{profiles}, -only the \code{"Modfed"} method can be used as \code{"CEA"} requires unrestricted -\code{profiles}. For more details on Bayesian D-efficient designs, see +The \code{"orthogonal"} method creates a design where an orthogonal array from +the full set of \code{profiles} is found. Choice sets are then created by +randomly sampling from this orthogonal array and then is repeated for each +respondent in the survey. For cases where an orthogonal array cannot be +found, a full factorial design is used. This approach is also sometimes +called a "main effects" design since orthogonal arrays focus the information +on the main effects at the expense of information about interaction effects. +For more information about orthogonal designs, see \code{?DoE.base::oa.design} as +well as the JSS article on the {DoE.base} package (Grömping, 2018). + +The \code{"CEA"} or \code{"Modfed"} methods create Bayesian D-efficient designs along +with specified \code{priors}. If \code{"CEA"} or \code{"Modfed"} is used without specifying +\code{priors}, a prior of all \code{0}s will be used and a warning message +stating this will be shown. Restricted sets of \code{profiles} can only be used +with \code{"Modfed"}. For more details on Bayesian D-efficient designs, see \code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the {idefix} package (Traets et al, 2020). } @@ -140,52 +165,61 @@ profiles <- cbc_profiles( freshness = c('Poor', 'Average', 'Excellent') ) -# Make a survey design from all possible profiles -# (This is the default setting where method = 'full' for "full factorial") +# Make a survey by randomly sampling from all possible profiles +# (This is the default setting where method = 'random') design_full <- cbc_design( profiles = profiles, - n_resp = 300, # Number of respondents + n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6 # Number of questions per respondent ) -# Make a survey design from an orthogonal array of profiles +# Make a survey using a full factorial design +design_full <- cbc_design( + profiles = profiles, + n_resp = 100, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'full' +) + +# Make a survey from an orthogonal array of profiles design_ortho <- cbc_design( profiles = profiles, - n_resp = 300, # Number of respondents + n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent method = 'orthogonal' ) -# Make a survey design from all possible profiles -# with a "no choice" option +# Make a survey by randomly sampling from all possible profiles +# and include a "no choice" option design_full_nochoice <- cbc_design( profiles = profiles, - n_resp = 300, # Number of respondents + n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent no_choice = TRUE ) -# Make a survey design from all possible profiles +# Make a survey by randomly sampling from all possible profiles # with each level of the "type" attribute appearing as an alternative design_full_labeled <- cbc_design( - profiles = profiles, - n_resp = 300, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - label = "type" + profiles = profiles, + n_resp = 100, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + label = "type" ) # Make a Bayesian D-efficient design with a prior model specified # Note that by speed can be improved by setting parallel = TRUE design_bayesian <- cbc_design( profiles = profiles, - n_resp = 300, # Number of respondents + n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - n_start = 1, # Defauls to 5, set to 1 here for a quick example + n_start = 1, # Defaults to 5, set to 1 here for a quick example priors = list( price = -0.1, type = c(0.1, 0.2), From b6c993957b957d67966f9357dc5555b75ed8079b Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 11:28:59 -0400 Subject: [PATCH 09/43] more documentation updates --- R/design.R | 58 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/R/design.R b/R/design.R index 9c83b48..95ec7fd 100644 --- a/R/design.R +++ b/R/design.R @@ -82,30 +82,36 @@ #' #' The `"full"` method for ("full factorial") creates a design where a fixed #' set of choice sets is created by randomly sampling from the full set of -#' `profiles` and then is repeated for each respondent in the survey. The number -#' of unique choice sets is determined by `n_q*n_blocks`. If blocking is used, -#' choice set blocks are created using mutually exclusive subsets of `profiles` -#' within each block. For more information about blocking with full factorial -#' designs, see `?DoE.base::fac.design` as well as the JSS article on the -#' {DoE.base} package (Grömping, 2018). +#' `profiles`, which is then repeated to meet the desired number of survey +#' respondents (`n_resp`). If blocking is used, choice set blocks are created using +#' mutually exclusive subsets of `profiles` within each block. This method is +#' less efficient than other approaches and may lead to a deficient experiment +#' in smaller sample sizes, though it guarantees equal ability to estimate main +#' and interaction effects. For more information about blocking with full +#' factorial designs, see `?DoE.base::fac.design` as well as the JSS article on +#' the {DoE.base} package (Grömping, 2018). #' #' The `"orthogonal"` method creates a design where an orthogonal array from -#' the full set of `profiles` is found. Choice sets are then created by -#' randomly sampling from this orthogonal array and then is repeated for each -#' respondent in the survey. For cases where an orthogonal array cannot be -#' found, a full factorial design is used. This approach is also sometimes -#' called a "main effects" design since orthogonal arrays focus the information -#' on the main effects at the expense of information about interaction effects. -#' For more information about orthogonal designs, see `?DoE.base::oa.design` as -#' well as the JSS article on the {DoE.base} package (Grömping, 2018). +#' the full set of `profiles` is found. A fixed set of choice sets is created by +#' randomly sampling from this orthogonal array, which is then repeated to meet +#' the desired number of survey respondents (`n_resp`). For cases where an +#' orthogonal array cannot be found, a full factorial design is used. This +#' approach is also sometimes called a "main effects" design since orthogonal +#' arrays focus the information on the main effects at the expense of +#' information about interaction effects. For more information about orthogonal +#' designs, see `?DoE.base::oa.design` as well as the JSS article on the +#' {DoE.base} package (Grömping, 2018). #' -#' The `"CEA"` or `"Modfed"` methods create Bayesian D-efficient designs along -#' with specified `priors`. If `"CEA"` or `"Modfed"` is used without specifying -#' `priors`, a prior of all `0`s will be used and a warning message -#' stating this will be shown. Restricted sets of `profiles` can only be used -#' with `"Modfed"`. For more details on Bayesian D-efficient designs, see -#' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the -#' {idefix} package (Traets et al, 2020). +#' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a +#' Bayesian D-efficient design. A fixed set of choice sets is first found and +#' then repeated to meet the desired number of survey respondents (`n_resp`). +#' The total number of unique choice sets is determined by `n_q*n_blocks`. +#' If `"CEA"` or `"Modfed"` is used without specifying `priors`, a prior of all +#' `0`s will be used and a warning message stating this will be shown. +#' Restricted sets of `profiles` can only be used with `"Modfed"`. For more +#' details on Bayesian D-efficient designs, see `?idefix::CEA` and +#' `?idefix::Modfed` as well as the JSS article on the {idefix} package +#' (Traets et al, 2020). #' @references #' Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41, #' \doi{10.18637/jss.v085.i05} @@ -268,6 +274,7 @@ make_design_random <- function( if (no_choice) { design <- add_no_choice(design, n_alts) } + design$blockID <- 1 design <- reorder_cols(design) row.names(design) <- NULL return(design) @@ -438,7 +445,7 @@ make_design_full <- function( if (n_blocks > 1) { design <- make_design_full_blocked(profiles, n_resp, n_alts, n_q, n_blocks) } else { - design <- make_random_sets(profiles, n_alts, n_q) + design <- make_random_sets(profiles, n_alts) design$blockID <- 1 design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) } @@ -477,7 +484,7 @@ design_rand_sets_blocked <- function(profiles, n_resp, n_alts, n_q, n_blocks) { design <- list() for (i in 1:n_blocks) { profiles_temp <- profiles[[i]] - design[[i]] <- make_random_sets(profiles_temp, n_alts, nrow(profiles_temp)) + design[[i]] <- make_random_sets(profiles_temp, n_alts) } design <- do.call(rbind, design) design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) @@ -485,7 +492,8 @@ design_rand_sets_blocked <- function(profiles, n_resp, n_alts, n_q, n_blocks) { return(design) } -make_random_sets <- function(profiles, n_alts, n_q) { +make_random_sets <- function(profiles, n_alts) { + n_q <- nrow(profiles) design <- sample_random_sets(profiles, n_alts, n_q) # Replace rows with duplicated profiles in each obsID or # duplicated choice sets in each respID @@ -545,7 +553,7 @@ make_design_orthogonal <- function( } type_ids <- get_type_ids(profiles) oa <- join_profiles(oa, profiles, type_ids) - design <- make_random_sets(oa, n_alts, n_q = nrow(oa)) + design <- make_random_sets(oa, n_alts) design$blockID <- 1 design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) return(design) From 941ffbb39c7cc6cf73dba4f2856e429e2e226314 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 17:04:11 -0400 Subject: [PATCH 10/43] Update cbc_design.Rd --- man/cbc_design.Rd | 50 ++++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index bee6593..5c3630b 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -128,30 +128,36 @@ estimate main and interaction effects. The \code{"full"} method for ("full factorial") creates a design where a fixed set of choice sets is created by randomly sampling from the full set of -\code{profiles} and then is repeated for each respondent in the survey. The number -of unique choice sets is determined by \code{n_q*n_blocks}. If blocking is used, -choice set blocks are created using mutually exclusive subsets of \code{profiles} -within each block. For more information about blocking with full factorial -designs, see \code{?DoE.base::fac.design} as well as the JSS article on the -{DoE.base} package (Grömping, 2018). +\code{profiles}, which is then repeated to meet the desired number of survey +respondents (\code{n_resp}). If blocking is used, choice set blocks are created using +mutually exclusive subsets of \code{profiles} within each block. This method is +less efficient than other approaches and may lead to a deficient experiment +in smaller sample sizes, though it guarantees equal ability to estimate main +and interaction effects. For more information about blocking with full +factorial designs, see \code{?DoE.base::fac.design} as well as the JSS article on +the {DoE.base} package (Grömping, 2018). The \code{"orthogonal"} method creates a design where an orthogonal array from -the full set of \code{profiles} is found. Choice sets are then created by -randomly sampling from this orthogonal array and then is repeated for each -respondent in the survey. For cases where an orthogonal array cannot be -found, a full factorial design is used. This approach is also sometimes -called a "main effects" design since orthogonal arrays focus the information -on the main effects at the expense of information about interaction effects. -For more information about orthogonal designs, see \code{?DoE.base::oa.design} as -well as the JSS article on the {DoE.base} package (Grömping, 2018). - -The \code{"CEA"} or \code{"Modfed"} methods create Bayesian D-efficient designs along -with specified \code{priors}. If \code{"CEA"} or \code{"Modfed"} is used without specifying -\code{priors}, a prior of all \code{0}s will be used and a warning message -stating this will be shown. Restricted sets of \code{profiles} can only be used -with \code{"Modfed"}. For more details on Bayesian D-efficient designs, see -\code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the -{idefix} package (Traets et al, 2020). +the full set of \code{profiles} is found. A fixed set of choice sets is created by +randomly sampling from this orthogonal array, which is then repeated to meet +the desired number of survey respondents (\code{n_resp}). For cases where an +orthogonal array cannot be found, a full factorial design is used. This +approach is also sometimes called a "main effects" design since orthogonal +arrays focus the information on the main effects at the expense of +information about interaction effects. For more information about orthogonal +designs, see \code{?DoE.base::oa.design} as well as the JSS article on the +{DoE.base} package (Grömping, 2018). + +The \code{"CEA"} and \code{"Modfed"} methods use the specified \code{priors} to create a +Bayesian D-efficient design. A fixed set of choice sets is first found and +then repeated to meet the desired number of survey respondents (\code{n_resp}). +The total number of unique choice sets is determined by \code{n_q*n_blocks}. +If \code{"CEA"} or \code{"Modfed"} is used without specifying \code{priors}, a prior of all +\code{0}s will be used and a warning message stating this will be shown. +Restricted sets of \code{profiles} can only be used with \code{"Modfed"}. For more +details on Bayesian D-efficient designs, see \code{?idefix::CEA} and +\code{?idefix::Modfed} as well as the JSS article on the {idefix} package +(Traets et al, 2020). } \examples{ library(cbcTools) From 9e13a02e33654c7ee75b933e529e947489953643 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 17:08:40 -0400 Subject: [PATCH 11/43] removed blockID for random and orthogonal designs --- R/design.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/design.R b/R/design.R index 95ec7fd..6f690c8 100644 --- a/R/design.R +++ b/R/design.R @@ -276,6 +276,7 @@ make_design_random <- function( } design$blockID <- 1 design <- reorder_cols(design) + design$blockID <- NULL row.names(design) <- NULL return(design) } @@ -453,6 +454,9 @@ make_design_full <- function( design <- add_no_choice(design, n_alts) } design <- reorder_cols(design) + if (n_blocks == 1) { + design$blockID <- NULL + } return(design) } @@ -555,7 +559,8 @@ make_design_orthogonal <- function( oa <- join_profiles(oa, profiles, type_ids) design <- make_random_sets(oa, n_alts) design$blockID <- 1 - design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) + design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks = 1) + design$blockID <- NULL return(design) } From 5fd0b6a3f9828619787683a24b190905dc4fdfc4 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 17:11:05 -0400 Subject: [PATCH 12/43] Update design.R --- R/design.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/design.R b/R/design.R index 6f690c8..1857bde 100644 --- a/R/design.R +++ b/R/design.R @@ -513,7 +513,7 @@ make_random_sets <- function(profiles, n_alts) { sample_random_sets <- function(design, n_alts, n_q) { # Make a randomized copy of the design for each alternative - design <- lapply(seq(n_alts), function(x) design[order(runif(n_q)),]) + design <- lapply(seq(n_alts), function(x) design[order(stats::runif(n_q)),]) design <- lapply(design, function(x) { x$order <- seq(nrow(x)) return(x) From f041a626d16e02aeda727f92ce294a094afe043f Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 5 Jul 2023 20:56:07 -0400 Subject: [PATCH 13/43] more updates to design documentation --- R/design.R | 187 +++++++++++++++++++++++++++-------------------------- 1 file changed, 96 insertions(+), 91 deletions(-) diff --git a/R/design.R b/R/design.R index 1857bde..499bb51 100644 --- a/R/design.R +++ b/R/design.R @@ -2,55 +2,55 @@ #' #' This function creates a data frame containing a choice-based conjoint survey #' design where each row is an alternative. Generate a variety of survey -#' designs, including full factorial designs, orthogonal designs, and -#' Bayesian D-efficient designs as well as designs with "no choice" options -#' and "labeled" (also known as "alternative specific") designs. +#' designs, including full factorial designs, orthogonal designs, and Bayesian +#' D-efficient designs as well as designs with "no choice" options and "labeled" +#' (also known as "alternative specific") designs. #' #' @keywords experiment design mnl mxl mixed logit logitr idefix DoE.base -#' @param profiles A data frame in which each row is a possible profile. -#' This can be generated using the `cbc_profiles()` function. +#' @param profiles A data frame in which each row is a possible profile. This +#' can be generated using the `cbc_profiles()` function. #' @param n_resp Number of survey respondents. #' @param n_alts Number of alternatives per choice question. #' @param n_q Number of questions per respondent. #' @param n_blocks Number of blocks used in Orthogonal or Bayesian D-efficient -#' designs. Max allowable is one block per respondent. Defaults to `1`, meaning -#' every respondent sees the same choice set. -#' @param n_draws Number of draws used in simulating the prior distribution -#' used in Bayesian D-efficient designs. Defaults to `50`. +#' designs. Max allowable is one block per respondent. Defaults to `1`, +#' meaning every respondent sees the same choice set. +#' @param n_draws Number of draws used in simulating the prior distribution used +#' in Bayesian D-efficient designs. Defaults to `50`. #' @param n_start A numeric value indicating the number of random start designs -#' to use in obtaining a Bayesian D-efficient design. The default is `5`. -#' Increasing `n_start` can result in a more efficient design at the expense -#' of increased computational time. -#' @param no_choice Include a "no choice" option in the choice sets? Defaults -#' to `FALSE`. If `TRUE`, the total number of alternatives per question will be -#' one more than the provided `n_alts` argument. -#' @param label The name of the variable to use in a "labeled" design -#' (also called an "alternative-specific design") such that each set of -#' alternatives contains one of each of the levels in the `label` attribute. -#' Currently not compatible with Bayesian D-efficient designs. If used, -#' the `n_alts` argument will be ignored as its value is defined by the unique -#' number of levels in the `label` variable. Defaults to `NULL`. +#' to use in obtaining a Bayesian D-efficient design. The default is `5`. +#' Increasing `n_start` can result in a more efficient design at the expense +#' of increased computational time. +#' @param no_choice Include a "no choice" option in the choice sets? Defaults to +#' `FALSE`. If `TRUE`, the total number of alternatives per question will be +#' one more than the provided `n_alts` argument. +#' @param label The name of the variable to use in a "labeled" design (also +#' called an "alternative-specific design") such that each set of alternatives +#' contains one of each of the levels in the `label` attribute. Currently not +#' compatible with Bayesian D-efficient designs. If used, the `n_alts` +#' argument will be ignored as its value is defined by the unique number of +#' levels in the `label` variable. Defaults to `NULL`. #' @param method Choose the design method to use: `"random"`, `"full"`, -#' `"orthogonal"`, `"CEA"` or `"Modfed"`. Defaults to `"random"`. See details -#' below for complete description of each method. -#' @param priors A list of one or more assumed prior parameters used to -#' generate a Bayesian D-efficient design. Defaults to `NULL` +#' `"orthogonal"`, `"CEA"` or `"Modfed"`. Defaults to `"random"`. See details +#' below for complete description of each method. +#' @param priors A list of one or more assumed prior parameters used to generate +#' a Bayesian D-efficient design. Defaults to `NULL` #' @param prior_no_choice Prior utility value for the "no choice" alternative. -#' Only required if `no_choice = TRUE`. Defaults to `NULL`. -#' @param probs If `TRUE`, for Bayesian D-efficient designs the resulting -#' design includes average predicted probabilities for each alternative in each -#' choice set given the sample from the prior preference distribution. -#' Defaults to `FALSE`. +#' Only required if `no_choice = TRUE`. Defaults to `NULL`. +#' @param probs If `TRUE`, for Bayesian D-efficient designs the resulting design +#' includes average predicted probabilities for each alternative in each +#' choice set given the sample from the prior preference distribution. +#' Defaults to `FALSE`. #' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned -#' object will be a list containing the design and the DB-error score. -#' Defaults to `FALSE`. +#' object will be a list containing the design and the DB-error score. +#' Defaults to `FALSE`. #' @param max_iter A numeric value indicating the maximum number allowed -#' iterations when searching for a Bayesian D-efficient design. The default is -#' 50. +#' iterations when searching for a Bayesian D-efficient design. The default is +#' 50. #' @param parallel Logical value indicating whether computations should be done -#' over multiple cores. The default is `FALSE`. -#' @details -#' The `method` argument determines the design method used. Options are: +#' over multiple cores. The default is `FALSE`. +#' @details The `method` argument determines the design method used. Options +#' are: #' #' - `"random"` #' - `"full"` @@ -58,67 +58,69 @@ #' - `"CEA"` #' - `"Modfed"` #' -#' All methods ensure that the two following criteria are met: +#' All methods ensure that the two following criteria are met: #' -#' 1. No two profiles are the same within any one choice set. -#' 2. No two choice sets are the same within any one respondent. +#' 1. No two profiles are the same within any one choice set. 2. No two choice +#' sets are the same within any one respondent. #' -#' The table below summarizes method compatibility with other design options. +#' The table below summarizes method compatibility with other design options. #' -#' | Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? | -#' |----------------|----------------------|-------------------|----------------------|-----------| -#' | `"random"` | ✅ | ✅ | ✅ | ❌ | -#' | `"full"` | ✅ | ✅ | ✅ | ✅ | -#' | `"orthogonal"` | ✅ | ❌ | ❌ | ❌ | -#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | -#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | +#' | Method | Include "no choice"? | Labeled designs? | Restricted +#' profiles? | Blocking? | +#' |----------------|----------------------|-------------------|----------------------|-----------| +#' | `"random"` | ✅ | ✅ | ✅ | ❌ | +#' | `"full"` | ✅ | ✅ | ✅ | ✅ | +#' | `"orthogonal"` | ✅ | ❌ | ❌ | ❌ | +#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | +#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | #' -#' The `"random"` method (the default) creates a design where choice sets are -#' created by randomly sampling from the full set of `profiles`. This means -#' that few (if any) respondents will see the same sets of choice sets. This -#' method is less efficient than other approaches and may lead to a deficient -#' experiment in smaller sample sizes, though it guarantees equal ability to -#' estimate main and interaction effects. +#' The `"random"` method (the default) creates a design where choice sets are +#' created by randomly sampling from the full set of `profiles` with +#' replacement. This means that few (if any) respondents will see the same +#' sets of choice sets. This method is less efficient than other approaches +#' and may lead to a deficient experiment in smaller sample sizes, though it +#' guarantees equal ability to estimate main and interaction effects. #' -#' The `"full"` method for ("full factorial") creates a design where a fixed -#' set of choice sets is created by randomly sampling from the full set of -#' `profiles`, which is then repeated to meet the desired number of survey -#' respondents (`n_resp`). If blocking is used, choice set blocks are created using -#' mutually exclusive subsets of `profiles` within each block. This method is -#' less efficient than other approaches and may lead to a deficient experiment -#' in smaller sample sizes, though it guarantees equal ability to estimate main -#' and interaction effects. For more information about blocking with full -#' factorial designs, see `?DoE.base::fac.design` as well as the JSS article on -#' the {DoE.base} package (Grömping, 2018). +#' For all other methods, a fixed set of choice sets is first created +#' following a particular strategy, and then the choice sets are repeated to +#' meet the desired number of survey respondents (determined by `n_resp`). #' -#' The `"orthogonal"` method creates a design where an orthogonal array from -#' the full set of `profiles` is found. A fixed set of choice sets is created by -#' randomly sampling from this orthogonal array, which is then repeated to meet -#' the desired number of survey respondents (`n_resp`). For cases where an -#' orthogonal array cannot be found, a full factorial design is used. This -#' approach is also sometimes called a "main effects" design since orthogonal -#' arrays focus the information on the main effects at the expense of -#' information about interaction effects. For more information about orthogonal -#' designs, see `?DoE.base::oa.design` as well as the JSS article on the -#' {DoE.base} package (Grömping, 2018). +#' The `"full"` method for ("full factorial") creates a design where choice +#' sets are created by randomly sampling from the full set of `profiles` +#' without replacement. If blocking is used, choice set blocks are created +#' using mutually exclusive subsets of `profiles` within each block. This +#' method is less efficient than other approaches and may lead to a deficient +#' experiment in smaller sample sizes, though it guarantees equal ability to +#' estimate main and interaction effects. For more information about blocking +#' with full factorial designs, see `?DoE.base::fac.design` as well as the JSS +#' article on the {DoE.base} package (Grömping, 2018). #' -#' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a -#' Bayesian D-efficient design. A fixed set of choice sets is first found and -#' then repeated to meet the desired number of survey respondents (`n_resp`). -#' The total number of unique choice sets is determined by `n_q*n_blocks`. -#' If `"CEA"` or `"Modfed"` is used without specifying `priors`, a prior of all -#' `0`s will be used and a warning message stating this will be shown. -#' Restricted sets of `profiles` can only be used with `"Modfed"`. For more -#' details on Bayesian D-efficient designs, see `?idefix::CEA` and -#' `?idefix::Modfed` as well as the JSS article on the {idefix} package -#' (Traets et al, 2020). -#' @references -#' Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41, -#' \doi{10.18637/jss.v085.i05} -#' Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, -#' \doi{10.18637/jss.v096.i03} +#' The `"orthogonal"` method creates a design where an orthogonal array from +#' the full set of `profiles` is found and then choice sets are created by +#' randomly sampling from this orthogonal array without replacement. For cases +#' where an orthogonal array cannot be found, a full factorial design is used. +#' This approach is also sometimes called a "main effects" design since +#' orthogonal arrays focus the information on the main effects at the expense +#' of information about interaction effects. For more information about +#' orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article +#' on the {DoE.base} package (Grömping, 2018). +#' +#' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a +#' Bayesian D-efficient design for the choice sets, with the total number of +#' unique choice sets determined by `n_q*n_blocks`. If `"CEA"` or `"Modfed"` +#' is used without specifying `priors`, a prior of all `0`s will be used and a +#' warning message stating this will be shown. Restricted sets of `profiles` +#' can only be used with `"Modfed"`. For more details on Bayesian D-efficient +#' designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS +#' article on the {idefix} package (Traets et al, 2020). +#' @references Grömping, U. (2018). R Package DoE.base for Factorial +#' Experiments. Journal of Statistical Software, 85(5), 1–41, +#' \doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. +#' (2020). Generating Optimal Designs for Discrete Choice Experiments in R: +#' The idefix Package. Journal of Statistical Software, 96(3), 1–41, +#' \doi{10.18637/jss.v096.i03} #' @return A data frame containing a choice-based conjoint survey design where -#' each row is an alternative. +#' each row is an alternative. #' @export #' @examples #' library(cbcTools) @@ -261,7 +263,7 @@ cbc_design <- function( # Random Design ---- -# Sample from profiles to create randomized choice sets +# Sample from profiles with replacement to create randomized choice sets make_design_random <- function( profiles, n_resp, n_alts, n_q, no_choice, label @@ -440,6 +442,9 @@ reorder_cols <- function(design) { # Full Factorial Design ---- +# Arrange copies of the full set of profiles into choice sets by sampling +# without replacement + make_design_full <- function( profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) { From 919b7252c32d1bc4965e89e6e8d6e4514b55688f Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 08:31:44 -0400 Subject: [PATCH 14/43] major reorg of how choice sets are formed and repeated --- R/design.R | 574 +++++++++++++++++++++++++++-------------------------- 1 file changed, 296 insertions(+), 278 deletions(-) diff --git a/R/design.R b/R/design.R index 499bb51..c836f7c 100644 --- a/R/design.R +++ b/R/design.R @@ -65,54 +65,61 @@ #' #' The table below summarizes method compatibility with other design options. #' -#' | Method | Include "no choice"? | Labeled designs? | Restricted -#' profiles? | Blocking? | -#' |----------------|----------------------|-------------------|----------------------|-----------| -#' | `"random"` | ✅ | ✅ | ✅ | ❌ | -#' | `"full"` | ✅ | ✅ | ✅ | ✅ | -#' | `"orthogonal"` | ✅ | ❌ | ❌ | ❌ | -#' | `"CEA"` | ✅ | ❌ | ❌ | ✅ | -#' | `"Modfed"` | ✅ | ❌ | ✅ | ✅ | +#' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | +#' Blocking? +#' ---|---|---|---|--- +#' `"random"` | ✅ | ✅ | ✅ | ❌ `"full"` | ✅ | ✅ | ✅ | ✅ +#' `"orthogonal"` | ✅ | ❌ | ❌ | ✅ `"CEA"` | ✅ | ❌ | ❌ | ✅ `"Modfed"` | +#' ✅ | ❌ | ✅ | ✅ #' #' The `"random"` method (the default) creates a design where choice sets are -#' created by randomly sampling from the full set of `profiles` with -#' replacement. This means that few (if any) respondents will see the same +#' created by randomly sampling from the full set of `profiles` *with +#' *replacement. This means that few (if any) respondents will see the same #' sets of choice sets. This method is less efficient than other approaches #' and may lead to a deficient experiment in smaller sample sizes, though it #' guarantees equal ability to estimate main and interaction effects. #' #' For all other methods, a fixed set of choice sets is first created -#' following a particular strategy, and then the choice sets are repeated to -#' meet the desired number of survey respondents (determined by `n_resp`). +#' following a particular strategy, and then the choice sets are #' #' The `"full"` method for ("full factorial") creates a design where choice #' sets are created by randomly sampling from the full set of `profiles` -#' without replacement. If blocking is used, choice set blocks are created -#' using mutually exclusive subsets of `profiles` within each block. This -#' method is less efficient than other approaches and may lead to a deficient -#' experiment in smaller sample sizes, though it guarantees equal ability to -#' estimate main and interaction effects. For more information about blocking -#' with full factorial designs, see `?DoE.base::fac.design` as well as the JSS -#' article on the {DoE.base} package (Grömping, 2018). +#' *without replacement*. The choice sets are then repeated to meet the +#' desired number of survey respondents (determined by `n_resp`). If blocking +#' is used, choice set blocks are created using mutually exclusive subsets of +#' `profiles` within each block. This method produces a design with similar +#' performance with that of the `"random"` method, except the choice sets are +#' repeated and thus there will be many more opportunities for different +#' respondents to see the same choice sets. This method is less efficient than +#' other approaches and may lead to a deficient experiment in smaller sample +#' sizes, though it guarantees equal ability to estimate main and interaction +#' effects. For more information about blocking with full factorial designs, +#' see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} +#' package (Grömping, 2018). #' #' The `"orthogonal"` method creates a design where an orthogonal array from #' the full set of `profiles` is found and then choice sets are created by -#' randomly sampling from this orthogonal array without replacement. For cases -#' where an orthogonal array cannot be found, a full factorial design is used. -#' This approach is also sometimes called a "main effects" design since -#' orthogonal arrays focus the information on the main effects at the expense -#' of information about interaction effects. For more information about -#' orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article -#' on the {DoE.base} package (Grömping, 2018). +#' randomly sampling from this orthogonal array *without replacement*. The +#' choice sets are then repeated to meet the desired number of survey +#' respondents (determined by `n_resp`). If blocking is used, choice set +#' blocks are created using mutually exclusive subsets of the orthogonal array +#' within each block. For cases where an orthogonal array cannot be found, a +#' full factorial design is used. This approach is also sometimes called a +#' "main effects" design since orthogonal arrays focus the information on the +#' main effects at the expense of information about interaction effects. For +#' more information about orthogonal designs, see `?DoE.base::oa.design` as +#' well as the JSS article on the {DoE.base} package (Grömping, 2018). #' #' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a #' Bayesian D-efficient design for the choice sets, with the total number of -#' unique choice sets determined by `n_q*n_blocks`. If `"CEA"` or `"Modfed"` -#' is used without specifying `priors`, a prior of all `0`s will be used and a -#' warning message stating this will be shown. Restricted sets of `profiles` -#' can only be used with `"Modfed"`. For more details on Bayesian D-efficient -#' designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS -#' article on the {idefix} package (Traets et al, 2020). +#' unique choice sets determined by `n_q*n_blocks`. The choice sets are then +#' repeated to meet the desired number of survey respondents (determined by +#' `n_resp`). If `"CEA"` or `"Modfed"` is used without specifying `priors`, a +#' prior of all `0`s will be used and a warning message stating this will be +#' shown. Restricted sets of `profiles` can only be used with `"Modfed"`. For +#' more details on Bayesian D-efficient designs, see `?idefix::CEA` and +#' `?idefix::Modfed` as well as the JSS article on the {idefix} package +#' (Traets et al, 2020). #' @references Grömping, U. (2018). R Package DoE.base for Factorial #' Experiments. Journal of Statistical Software, 85(5), 1–41, #' \doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. @@ -136,7 +143,7 @@ #' #' # Make a survey by randomly sampling from all possible profiles #' # (This is the default setting where method = 'random') -#' design_full <- cbc_design( +#' design_random <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -153,7 +160,7 @@ #' ) #' #' # Make a survey from an orthogonal array of profiles -#' design_ortho <- cbc_design( +#' design_orthogonal <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -163,7 +170,7 @@ #' #' # Make a survey by randomly sampling from all possible profiles #' # and include a "no choice" option -#' design_full_nochoice <- cbc_design( +#' design_random_nochoice <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -173,7 +180,7 @@ #' #' # Make a survey by randomly sampling from all possible profiles #' # with each level of the "type" attribute appearing as an alternative -#' design_full_labeled <- cbc_design( +#' design_random_labeled <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question @@ -247,7 +254,7 @@ cbc_design <- function( ) } else if (method == 'orthogonal') { design <- make_design_orthogonal( - profiles, n_resp, n_alts, n_q, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) } else { design <- make_design_bayesian( @@ -256,60 +263,70 @@ cbc_design <- function( parallel, profiles_restricted ) } - # Reset row numbers + design <- reorder_cols(design) row.names(design) <- NULL return(design) } -# Random Design ---- - -# Sample from profiles with replacement to create randomized choice sets +# General helpers ---- -make_design_random <- function( - profiles, n_resp, n_alts, n_q, no_choice, label -) { - if (is.null(label)) { - design <- design_rand_sample(profiles, n_resp, n_alts, n_q) - } else { - design <- design_rand_sample_label(profiles, n_resp, n_alts, n_q, label) - } - if (no_choice) { - design <- add_no_choice(design, n_alts) +get_profile_list <- function(profiles) { + profile_lvls <- profiles[, 2:ncol(profiles)] + varnames <- names(profile_lvls) + type_ids <- get_type_ids(profiles) + profile_list <- list() + for (i in seq_len(ncol(profile_lvls))) { + if (type_ids$discrete[i]) { + profile_list[[i]] <- levels(profile_lvls[,i]) + } else { + profile_list[[i]] <- unique(profile_lvls[,i]) + } } - design$blockID <- 1 - design <- reorder_cols(design) - design$blockID <- NULL - row.names(design) <- NULL - return(design) + names(profile_list) <- varnames + return(profile_list) } -design_rand_sample <- function(profiles, n_resp, n_alts, n_q) { - design <- sample_profiles(profiles, size = n_resp * n_alts * n_q) - design <- add_metadata(design, n_resp, n_alts, n_q) - # Replace rows with duplicated profiles in each obsID or - # duplicated choice sets in each respID - dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp, n_q) - while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { - # cat('Number dupe rows by obs:', length(dup_rows_obs), '\n') - # cat('Number dupe rows by resp:', length(dup_rows_resp), '\n') - dup_rows <- unique(c(dup_rows_obs, dup_rows_resp)) - new_rows <- sample_profiles(profiles, size = length(dup_rows)) - design[dup_rows, 1:ncol(new_rows)] <- new_rows - # Recalculate duplicates - dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp, n_q) - } - return(design) +get_type_ids <- function(profiles) { + types <- get_col_types(profiles[, 2:ncol(profiles)]) + ids <- list() + ids$discrete <- types %in% c("factor", "character") + ids$continuous <- !ids$discrete + return(ids) } -sample_profiles <- function(profiles, size) { - return(profiles[sample( - x = seq_len(nrow(profiles)), size = size, replace = TRUE), ] - ) +join_profiles <- function(design, profiles) { + + # Replaces the generated design with rows from profiles, which ensures + # factor levels in profiles are maintained in design + + # Keep track of row order in design + design$row_id <- seq(nrow(design)) + + # Before joining profiles, ensure that all the data types are the same + # as in profiles, otherwise join won't work properly + + type_ids <- get_type_ids(profiles) + + # Convert numeric columns to actual numbers + for (id in which(type_ids$continuous)) { + design[,id] <- as.numeric(as.character(design[,id])) + } + + # Convert character types to factors and set same levels as profiles + for (id in which(type_ids$discrete)) { + design[,id] <- factor(design[,id], levels = levels(profiles[,id+1])) + } + + # Join on profileIDs, then reorder to retain design order + varnames <- names(profiles[, 2:ncol(profiles)]) + design <- merge(design, profiles, by = varnames, all.x = TRUE) + design <- design[order(design$row_id),] + if ('blockID' %in% names(design)) { varnames <- c(varnames, 'blockID') } + design <- design[c('profileID', varnames)] + return(design) } -add_metadata <- function(design, n_resp, n_alts, n_q, n_blocks) { +add_metadata <- function(design, n_resp, n_alts, n_q) { design$respID <- rep(seq(n_resp), each = n_alts * n_q) design$qID <- rep(rep(seq(n_q), each = n_alts), n_resp) design$altID <- rep(seq(n_alts), n_resp * n_q) @@ -352,60 +369,12 @@ dup_obs_by_resp <- function(df) { return(NULL) } -design_rand_sample_label <- function(profiles, n_resp, n_alts, n_q, label) { - n_levels <- length(unique(profiles[, label])) - if (n_levels != n_alts) { - warning( - "The supplied 'n_alts' argument is being ignored and set to ", n_levels, - " to match the number of unique levels in the ", label, - " variable.\n" - ) - # Over-ride user-provided n_alts as it is determined by the label - n_alts <- n_levels - } - # Randomize rows by label - labels <- split(profiles, profiles[label]) - design <- sample_profiles_by_group(labels, size = n_resp * n_q) - # Replace rows with duplicated profiles in each obsID or - # duplicated choice sets in each respID - design <- add_metadata(design, n_resp, n_alts, n_q) - dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp, n_q) - while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { - # cat('Number dupe rows by obs:', length(dup_rows_obs), '\n') - # cat('Number dupe rows by resp:', length(dup_rows_resp), '\n') - dup_rows <- unique(c(dup_rows_obs, dup_rows_resp)) - new_rows <- sample_profiles_by_group(labels, size = length(dup_rows) / n_alts) - design[dup_rows, 1:ncol(new_rows)] <- new_rows - # Recalculate duplicates - dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp, n_q) - } - return(design) -} - -sample_profiles_by_group <- function(labels, size) { - design <- lapply(labels, function(x) sample_profiles(x, size = size)) - design <- lapply(design, function(x) add_label_id(x)) - design <- do.call(rbind, design) - design <- design[order(design$labelID), ] - design$labelID <- NULL - return(design) -} - -add_label_id <- function(design) { - design$labelID <- seq(nrow(design)) - return(design) -} - add_no_choice <- function(design, n_alts) { # Must dummy code categorical variables to include an outside good design <- dummy_code(design) # Create outside good rows design_og <- design[which(design$altID == 1), ] - design_og[ - , !names(design_og) %in% c("respID", "qID", "altID", "obsID") - ] <- 0 + design_og[,!names(design_og) %in% c("respID", "qID", "altID", "obsID")] <- 0 design_og$altID <- n_alts + 1 design_og$no_choice <- 1 # Insert outside good rows into design @@ -427,145 +396,245 @@ dummy_code <- function(design) { get_col_types <- function(data) { types <- lapply(data, class) - test <- function(x) { - x[1] - } + test <- function(x) { x[1] } return(unlist(lapply(types, test))) } reorder_cols <- function(design) { - metaNames <- c("profileID", "respID", "qID", "altID", "obsID", "blockID") - varNames <- setdiff(names(design), metaNames) - design <- as.data.frame(design)[, c(metaNames, varNames)] - return(design) + metaNames <- c("profileID", "respID", "qID", "altID", "obsID", "blockID") + varNames <- setdiff(names(design), metaNames) + design <- as.data.frame(design)[, c(metaNames, varNames)] + return(design) } -# Full Factorial Design ---- +# Choice sets ---- -# Arrange copies of the full set of profiles into choice sets by sampling -# without replacement +make_random_sets <- function(profiles, n_alts) { + n_q <- nrow(profiles) + design <- sample_random_sets(profiles, n_alts, n_q) + # Replace rows with duplicated profiles in each obsID or + # duplicated choice sets in each respID + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) + while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { + design <- sample_random_sets(profiles, n_alts, n_q) + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) + } + return(design) +} -make_design_full <- function( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label -) { +make_random_sets_by_block <- function(profiles, n_alts, n_blocks) { + # Make choice sets for each set of profileIDs + profiles <- split(profiles, profiles$blockID) + choice_sets <- list() + for (i in 1:n_blocks) { + choice_sets[[i]] <- make_random_sets(profiles[[i]], n_alts) + } + choice_sets <- do.call(rbind, choice_sets) + return(choice_sets) +} + +sample_random_sets <- function(profiles, n_alts, n_q) { + # Make a randomized copy of the profiles for each alternative + sets <- lapply(seq(n_alts), function(x) profiles[order(stats::runif(n_q)),]) + sets <- lapply(sets, function(x) { + x$order <- seq(nrow(x)) + return(x) + }) + sets <- do.call(rbind, sets) + sets <- sets[order(sets$order),] + sets <- add_metadata(sets, n_resp = 1, n_alts, n_q) + sets$order <- NULL + return(sets) +} + +repeat_sets <- function(choice_sets, n_resp, n_alts, n_q, n_blocks) { + # Repeat choice sets to match number of respondents if (n_blocks > 1) { - design <- make_design_full_blocked(profiles, n_resp, n_alts, n_q, n_blocks) + choice_sets <- split(choice_sets, choice_sets$blockID) + n_resp_block <- ceiling(n_resp / n_blocks) + n_reps <- ceiling(n_resp_block / (nrow(choice_sets[[1]]) / n_alts / n_q)) + design <- list() + for (i in seq_len(n_blocks)) { + set <- choice_sets[[i]] + temp <- set[rep(seq_len(nrow(set)), n_reps), ] + design[[i]] <- temp[1:(n_resp_block*n_q*n_alts), ] + } + design <- do.call(rbind, design) } else { - design <- make_random_sets(profiles, n_alts) - design$blockID <- 1 - design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) - } - if (no_choice) { - design <- add_no_choice(design, n_alts) - } - design <- reorder_cols(design) - if (n_blocks == 1) { - design$blockID <- NULL + design <- choice_sets[rep(seq_len(nrow(choice_sets)), n_resp), ] } + design <- design[1:(n_resp*n_q*n_alts), ] + design <- add_metadata(design, n_resp, n_alts, n_q) return(design) } -make_design_full_blocked <- function( - profiles, n_resp, n_alts, n_q, n_blocks -) { - # Make blocks - design <- suppressMessages(as.data.frame( - DoE.base::fac.design( - factor.names = get_profile_list(profiles), - blocks = n_blocks, - block.name = "blockID" - ) - )) - # Make blockID a number, then join on profileIDs - design$blockID <- as.numeric(as.character(design$blockID)) - design <- design[,c(names(profiles)[2:ncol(profiles)], "blockID")] - type_ids <- get_type_ids(profiles) - profiles <- join_profiles(design, profiles, type_ids) +# Random Design ---- + +# Sample from profiles with replacement to create randomized choice sets - # Create random choice sets within each block - design <- design_rand_sets_blocked(profiles, n_resp, n_alts, n_q, n_blocks) +make_design_random <- function( + profiles, n_resp, n_alts, n_q, no_choice, label +) { + if (is.null(label)) { + design <- design_rand_sample(profiles, n_resp, n_alts, n_q) + } else { + design <- design_rand_sample_label(profiles, n_resp, n_alts, n_q, label) + } + if (no_choice) { + design <- add_no_choice(design, n_alts) + } return(design) } -design_rand_sets_blocked <- function(profiles, n_resp, n_alts, n_q, n_blocks) { - # Make choice sets for each set of profileIDs - profiles <- split(profiles, profiles$blockID) - design <- list() - for (i in 1:n_blocks) { - profiles_temp <- profiles[[i]] - design[[i]] <- make_random_sets(profiles_temp, n_alts) - } - design <- do.call(rbind, design) - design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) +design_rand_sample <- function(profiles, n_resp, n_alts, n_q) { + design <- sample_profiles(profiles, size = n_resp * n_alts * n_q) design <- add_metadata(design, n_resp, n_alts, n_q) + # Replace rows with duplicated profiles in each obsID or + # duplicated choice sets in each respID + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp, n_q) + while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { + # cat('Number dupe rows by obs:', length(dup_rows_obs), '\n') + # cat('Number dupe rows by resp:', length(dup_rows_resp), '\n') + dup_rows <- unique(c(dup_rows_obs, dup_rows_resp)) + new_rows <- sample_profiles(profiles, size = length(dup_rows)) + design[dup_rows, 1:ncol(new_rows)] <- new_rows + # Recalculate duplicates + dup_rows_obs <- get_dup_obs(design, n_alts) + dup_rows_resp <- get_dup_resp(design, n_resp, n_q) + } return(design) } -make_random_sets <- function(profiles, n_alts) { - n_q <- nrow(profiles) - design <- sample_random_sets(profiles, n_alts, n_q) +design_rand_sample_label <- function(profiles, n_resp, n_alts, n_q, label) { + n_levels <- length(unique(profiles[, label])) + if (n_levels != n_alts) { + warning( + "The supplied 'n_alts' argument is being ignored and set to ", n_levels, + " to match the number of unique levels in the ", label, + " variable.\n" + ) + # Over-ride user-provided n_alts as it is determined by the label + n_alts <- n_levels + } + # Randomize rows by label + labels <- split(profiles, profiles[label]) + design <- sample_profiles_by_group(labels, size = n_resp * n_q) # Replace rows with duplicated profiles in each obsID or # duplicated choice sets in each respID + design <- add_metadata(design, n_resp, n_alts, n_q) dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) + dup_rows_resp <- get_dup_resp(design, n_resp, n_q) while ((length(dup_rows_obs) > 0) | (length(dup_rows_resp) > 0)) { - design <- sample_random_sets(profiles, n_alts, n_q) + # cat('Number dupe rows by obs:', length(dup_rows_obs), '\n') + # cat('Number dupe rows by resp:', length(dup_rows_resp), '\n') + dup_rows <- unique(c(dup_rows_obs, dup_rows_resp)) + new_rows <- sample_profiles_by_group(labels, size = length(dup_rows) / n_alts) + design[dup_rows, 1:ncol(new_rows)] <- new_rows + # Recalculate duplicates dup_rows_obs <- get_dup_obs(design, n_alts) - dup_rows_resp <- get_dup_resp(design, n_resp = 1, n_q) + dup_rows_resp <- get_dup_resp(design, n_resp, n_q) } return(design) } -sample_random_sets <- function(design, n_alts, n_q) { - # Make a randomized copy of the design for each alternative - design <- lapply(seq(n_alts), function(x) design[order(stats::runif(n_q)),]) - design <- lapply(design, function(x) { - x$order <- seq(nrow(x)) - return(x) - }) +sample_profiles <- function(profiles, size) { + return(profiles[sample( + x = seq_len(nrow(profiles)), size = size, replace = TRUE), ] + ) +} + +sample_profiles_by_group <- function(labels, size) { + design <- lapply(labels, function(x) sample_profiles(x, size = size)) + design <- lapply(design, function(x) add_label_id(x)) design <- do.call(rbind, design) - design <- design[order(design$order),] - design <- add_metadata(design, n_resp = 1, n_alts, n_q) - design$order <- NULL + design <- design[order(design$labelID), ] + design$labelID <- NULL return(design) } -repeat_design <- function(design, n_resp, n_alts, n_q, n_blocks) { - # Repeat design to match number of respondents - n_reps <- ceiling(n_resp / n_blocks) - design <- design[rep(seq_len(nrow(design)), n_reps), ] - design <- design[1:(n_resp*n_q*n_alts), ] +add_label_id <- function(design) { + design$labelID <- seq(nrow(design)) + return(design) +} - # Add metadata - design <- add_metadata(design, n_resp, n_alts, n_q) - design <- reorder_cols(design) +# Full Factorial Design ---- + +# Arrange copies of the full set of profiles into choice sets by sampling +# without replacement + +make_design_full <- function( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label +) { + if (n_blocks > 1) { + # Make blocks + design <- suppressMessages(as.data.frame(DoE.base::fac.design( + factor.names = get_profile_list(profiles), + blocks = n_blocks, + block.name = "blockID" + ))) + # Make blockID a number, then join on profileIDs + design$blockID <- as.numeric(as.character(design$blockID)) + design <- design[,c(names(profiles)[2:ncol(profiles)], "blockID")] + profiles <- join_profiles(design, profiles) + # Create random choice sets within each block + choice_sets <- make_random_sets_by_block(profiles, n_alts, n_blocks) + } else { + choice_sets <- make_random_sets(profiles, n_alts) + } + design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) + if (no_choice) { + design <- add_no_choice(design, n_alts) + } return(design) } # Orthogonal Design ---- make_design_orthogonal <- function( - profiles, n_resp, n_alts, n_q, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) { - oa <- suppressMessages(as.data.frame( - DoE.base::oa.design( - factor.names = get_profile_list(profiles) - ) - )) + # First obtain the orthogonal array + oa <- suppressMessages(as.data.frame(DoE.base::oa.design( + factor.names = get_profile_list(profiles) + ))) if (nrow(oa) == nrow(profiles)) { - message("No orthogonal array found; using full factorial for design") + message("No orthogonal array found; using full factorial for design") } else { - message( - "Orthogonal array found; using ", nrow(oa), " out of ", - nrow(profiles), " profiles for design" + message( + "Orthogonal array found; using ", nrow(oa), " out of ", + nrow(profiles), " profiles for design" + ) + } + oa <- join_profiles(oa, profiles) + if (n_blocks > 1) { + q_per_resp <- nrow(oa) / n_blocks + if (q_per_resp %% 1 != 0) { + stop( + 'The number of blocks used cannot be evenly divided into the ', + 'orthogonal array. Use a different number for "n_blocks" that ', + 'is as factor of ', nrow(oa) + ) + } + if (q_per_resp < n_q) { + stop( + 'The orthogonal array cannot be divided into ', n_blocks, + ' blocks such that each respondent sees ', n_q, + ' questions. Either decrease "n_blocks" or increase "n_q".' ) + } + oa$blockID <- rep(seq(n_blocks), each = q_per_resp) + # Create random choice sets within each block + choice_sets <- make_random_sets_by_block(oa, n_alts, n_blocks) + } else { + choice_sets <- make_random_sets(oa, n_alts) + } + design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) + if (no_choice) { + design <- add_no_choice(design, n_alts) } - type_ids <- get_type_ids(profiles) - oa <- join_profiles(oa, profiles, type_ids) - design <- make_random_sets(oa, n_alts) - design$blockID <- 1 - design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks = 1) - design$blockID <- NULL return(design) } @@ -676,9 +745,11 @@ make_design_bayesian <- function( # Join on profileIDs to design design <- design_raw$design names(design) <- varnames - design <- join_profiles(design, profiles, type_ids) + design <- join_profiles(design, profiles) if (no_choice) { - design <- add_no_choice_deff(design, n_alts, varnames[type_ids$discrete]) + design <- add_no_choice_bayesian( + design, n_alts, varnames[type_ids$discrete] + ) } # Include probs? @@ -688,7 +759,7 @@ make_design_bayesian <- function( # Repeat design to match number of respondents design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) - design <- repeat_design(design, n_resp, n_alts, n_q, n_blocks) + design <- repeat_sets(design, n_resp, n_alts, n_q, n_blocks) # Print DB error message( @@ -704,30 +775,6 @@ make_design_bayesian <- function( return(design) } -get_type_ids <- function(profiles) { - types <- get_col_types(profiles[, 2:ncol(profiles)]) - ids <- list() - ids$discrete <- types %in% c("factor", "character") - ids$continuous <- !ids$discrete - return(ids) -} - -get_profile_list <- function(profiles) { - profile_lvls <- profiles[, 2:ncol(profiles)] - varnames <- names(profile_lvls) - type_ids <- get_type_ids(profiles) - profile_list <- list() - for (i in seq_len(ncol(profile_lvls))) { - if (type_ids$discrete[i]) { - profile_list[[i]] <- levels(profile_lvls[,i]) - } else { - profile_list[[i]] <- unique(profile_lvls[,i]) - } - } - names(profile_list) <- varnames - return(profile_list) -} - defineCandidateSet <- function( lvls, coding, c.lvls, profile_lvls, type_ids, profiles_restricted ) { @@ -765,40 +812,11 @@ defineCandidateSet <- function( return(cand_set_res) } -join_profiles <- function(design, profiles, type_ids) { - # Replaces the generated design with rows from profiles, which ensures - # factor levels in profiles are maintained in design - - # Keep track of row order in design - design$row_id <- seq(nrow(design)) - - # Before joining profiles, ensure that all the data types are the same - # as in profiles, otherwise join won't work properly - - # Convert numeric columns to actual numbers - for (id in which(type_ids$continuous)) { - design[,id] <- as.numeric(as.character(design[,id])) - } - - # Convert character types to factors and set same levels as profiles - for (id in which(type_ids$discrete)) { - design[,id] <- factor(design[,id], levels = levels(profiles[,id+1])) - } - - # Join on profileIDs, then reorder to retain design order - varnames <- names(profiles[, 2:ncol(profiles)]) - design <- merge(design, profiles, by = varnames, all.x = TRUE) - design <- design[order(design$row_id),] - if ('blockID' %in% names(design)) { varnames <- c(varnames, 'blockID') } - design <- design[c('profileID', varnames)] - return(design) -} - -add_no_choice_deff <- function(design, n_alts, varnames_discrete) { +add_no_choice_bayesian <- function(design, n_alts, varnames_discrete) { # First dummy code categorical variables design$obsID <- rep(seq(nrow(design) / n_alts), each = n_alts) design$altID <- rep(seq(n_alts), nrow(design) / n_alts) - design <- design[which(design$altID != 4), ] + design <- design[which(design$altID != (n_alts + 1)), ] design <- fastDummies::dummy_cols( design, select_columns = varnames_discrete, From ecbf4928176be0cc528f0fe62dc9c60373f21804 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 15:32:31 -0400 Subject: [PATCH 15/43] update how no choice is handled in bayesian designs --- R/design.R | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/R/design.R b/R/design.R index c836f7c..837d8b8 100644 --- a/R/design.R +++ b/R/design.R @@ -746,19 +746,18 @@ make_design_bayesian <- function( design <- design_raw$design names(design) <- varnames design <- join_profiles(design, profiles) + if (no_choice) { - design <- add_no_choice_bayesian( - design, n_alts, varnames[type_ids$discrete] - ) + design <- add_no_choice_bayesian(design, n_alts, varnames[type_ids$discrete]) } - # Include probs? if (probs) { design$probs <- as.vector(t(D$probs)) } - # Repeat design to match number of respondents design$blockID <- rep(seq(n_blocks), each = n_alts*n_q) + + # Repeat design to match number of respondents design <- repeat_sets(design, n_resp, n_alts, n_q, n_blocks) # Print DB error @@ -816,7 +815,7 @@ add_no_choice_bayesian <- function(design, n_alts, varnames_discrete) { # First dummy code categorical variables design$obsID <- rep(seq(nrow(design) / n_alts), each = n_alts) design$altID <- rep(seq(n_alts), nrow(design) / n_alts) - design <- design[which(design$altID != (n_alts + 1)), ] + design <- design[which(design$altID != n_alts), ] design <- fastDummies::dummy_cols( design, select_columns = varnames_discrete, @@ -829,7 +828,7 @@ add_no_choice_bayesian <- function(design, n_alts, varnames_discrete) { design_og$altID <- n_alts design_og$profileID <- 0 design_og[, - which(! names(design_og) %in% c('profileID', 'altID', 'obsID'))] <- 0 + which(! names(design_og) %in% c('profileID', 'altID', 'obsID'))] <- 0 design_og$no_choice <- 1 design <- rbind(design, design_og) design <- design[order(design$obsID, design$altID), ] From b77829733791f47bc6ec2f07bd7216f952f1f218 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 16:17:45 -0400 Subject: [PATCH 16/43] labeled full design done --- R/design.R | 82 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 64 insertions(+), 18 deletions(-) diff --git a/R/design.R b/R/design.R index 837d8b8..526c17d 100644 --- a/R/design.R +++ b/R/design.R @@ -296,12 +296,6 @@ get_type_ids <- function(profiles) { join_profiles <- function(design, profiles) { - # Replaces the generated design with rows from profiles, which ensures - # factor levels in profiles are maintained in design - - # Keep track of row order in design - design$row_id <- seq(nrow(design)) - # Before joining profiles, ensure that all the data types are the same # as in profiles, otherwise join won't work properly @@ -319,8 +313,7 @@ join_profiles <- function(design, profiles) { # Join on profileIDs, then reorder to retain design order varnames <- names(profiles[, 2:ncol(profiles)]) - design <- merge(design, profiles, by = varnames, all.x = TRUE) - design <- design[order(design$row_id),] + design <- merge(design, profiles, by = varnames, all.x = TRUE, sort = FALSE) if ('blockID' %in% names(design)) { varnames <- c(varnames, 'blockID') } design <- design[c('profileID', varnames)] return(design) @@ -509,16 +502,7 @@ design_rand_sample <- function(profiles, n_resp, n_alts, n_q) { } design_rand_sample_label <- function(profiles, n_resp, n_alts, n_q, label) { - n_levels <- length(unique(profiles[, label])) - if (n_levels != n_alts) { - warning( - "The supplied 'n_alts' argument is being ignored and set to ", n_levels, - " to match the number of unique levels in the ", label, - " variable.\n" - ) - # Over-ride user-provided n_alts as it is determined by the label - n_alts <- n_levels - } + n_alts <- override_label_alts(profiles, label, n_alts) # Randomize rows by label labels <- split(profiles, profiles[label]) design <- sample_profiles_by_group(labels, size = n_resp * n_q) @@ -560,6 +544,20 @@ add_label_id <- function(design) { return(design) } +override_label_alts <- function(profiles, label, n_alts) { + n_levels <- length(unique(profiles[, label])) + if (n_levels != n_alts) { + warning( + "The supplied 'n_alts' argument is being ignored and set to ", n_levels, + " to match the number of unique levels in the ", label, + " variable.\n" + ) + # Over-ride user-provided n_alts as it is determined by the label + n_alts <- n_levels + } + return(n_alts) +} + # Full Factorial Design ---- # Arrange copies of the full set of profiles into choice sets by sampling @@ -568,6 +566,11 @@ add_label_id <- function(design) { make_design_full <- function( profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label ) { + if (!is.null(label)) { + return(make_design_full_labeled( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label + )) + } if (n_blocks > 1) { # Make blocks design <- suppressMessages(as.data.frame(DoE.base::fac.design( @@ -591,6 +594,49 @@ make_design_full <- function( return(design) } +make_design_full_labeled <- function( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label +) { + n_alts <- override_label_alts(profiles, label, n_alts) + labels <- unique(profiles[,label]) + profiles_orig <- profiles + # Remove the label column from profiles + profiles[label] <- NULL + profiles$profileID <- NULL + profiles <- profiles[!duplicated(profiles),] + profiles$profileID <- seq(nrow(profiles)) + profiles <- cbind( + profiles[,ncol(profiles)], + profiles[,1:(ncol(profiles)-1)] + ) + names(profiles)[1] <- 'profileID' + if (n_blocks > 1) { + # Make blocks + design <- suppressMessages(as.data.frame(DoE.base::fac.design( + factor.names = get_profile_list(profiles), + blocks = n_blocks, + block.name = "blockID" + ))) + # Make blockID a number, then join on profileIDs + design$blockID <- as.numeric(as.character(design$blockID)) + design <- design[,c(names(profiles)[2:ncol(profiles)], "blockID")] + profiles <- join_profiles(design, profiles) + # Create random choice sets within each block + choice_sets <- make_random_sets_by_block(profiles, n_alts, n_blocks) + } else { + choice_sets <- make_random_sets(profiles, n_alts) + } + # Add label attribute and original profileIDs + choice_sets[label] <- rep(labels, nrow(choice_sets) / length(labels)) + choice_sets$profileID <- NULL + choice_sets <- merge(choice_sets, profiles_orig, all.x = TRUE, sort = FALSE) + design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) + if (no_choice) { + design <- add_no_choice(design, n_alts) + } + return(design) +} + # Orthogonal Design ---- make_design_orthogonal <- function( From 421e396418030b625326c91518cbe708b4ea9259 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 16:26:52 -0400 Subject: [PATCH 17/43] updated input checks for design --- R/design.R | 10 ++++++---- R/input_checks.R | 22 ++++++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/R/design.R b/R/design.R index 526c17d..d2f1b0c 100644 --- a/R/design.R +++ b/R/design.R @@ -116,10 +116,12 @@ #' repeated to meet the desired number of survey respondents (determined by #' `n_resp`). If `"CEA"` or `"Modfed"` is used without specifying `priors`, a #' prior of all `0`s will be used and a warning message stating this will be -#' shown. Restricted sets of `profiles` can only be used with `"Modfed"`. For -#' more details on Bayesian D-efficient designs, see `?idefix::CEA` and -#' `?idefix::Modfed` as well as the JSS article on the {idefix} package -#' (Traets et al, 2020). +#' shown. In the opposite case, if `priors` are specified but neither Bayesian +#' method is used, the `"CEA"` method will be used and a warning stating this +#' will be shown. Restricted sets of `profiles` can only be used with +#' `"Modfed"`. For more details on Bayesian D-efficient designs, see +#' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the +#' {idefix} package (Traets et al, 2020). #' @references Grömping, U. (2018). R Package DoE.base for Factorial #' Experiments. Journal of Statistical Software, 85(5), 1–41, #' \doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. diff --git a/R/input_checks.R b/R/input_checks.R index b06d2c4..fec045d 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -39,6 +39,14 @@ check_design_method <- function(method, priors) { method <- 'CEA' } } + # Check that an appropriate method is used + + if (! method %in% c('random', 'full', 'orthogonal', 'CEA', 'Modfed')) { + stop( + 'The "method" argument must be set to "random", "full", ', + '"orthogonal", "Modfed", or "CEA"' + ) + } return(method) } @@ -67,16 +75,18 @@ check_inputs_design <- function( } if (n_blocks > n_resp) { - stop("Maximum allowable number of blocks is one block per respondent") + stop("Maximum allowable number of blocks is one block per respondent") } - # Check that an appropriate method is used - - if (! method %in% c('random', 'full', 'orthogonal', 'CEA', 'Modfed')) { + if ((n_blocks > 1) & (method == 'random')) { + stop('The "random" method cannot use blocking.') + if ((method == 'full') & profiles_restricted) { stop( - 'The "method" argument must be set to "random", "full", ', - '"orthogonal", "Modfed", or "CEA"' + 'The "full" method cannot use restricted profiles when blocking ', + 'is used. Either set "n_blocks" to 1 or use an unrestricted ', + 'set of profiles' ) + } } # Check that orthogonal design method does not use a label or a From 3d5c77c0f2e6914a1622512cb77b661614ba5288 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 16:54:39 -0400 Subject: [PATCH 18/43] setting up D-efficient designs with AlgDesign::optFederov --- DESCRIPTION | 1 + R/design.R | 89 +++++++++++++++++++++++++++++++++++++++++++----- R/input_checks.R | 14 ++++++-- 3 files changed, 93 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cb9a96b..0477682 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,6 +21,7 @@ Suggests: testthat, tibble Imports: + AlgDesign, DoE.base, fastDummies, ggplot2, diff --git a/R/design.R b/R/design.R index d2f1b0c..e926135 100644 --- a/R/design.R +++ b/R/design.R @@ -31,8 +31,8 @@ #' argument will be ignored as its value is defined by the unique number of #' levels in the `label` variable. Defaults to `NULL`. #' @param method Choose the design method to use: `"random"`, `"full"`, -#' `"orthogonal"`, `"CEA"` or `"Modfed"`. Defaults to `"random"`. See details -#' below for complete description of each method. +#' `"orthogonal"`, `"deff"`, `"CEA"`, or `"Modfed"`. Defaults to `"random"`. +#' See details below for complete description of each method. #' @param priors A list of one or more assumed prior parameters used to generate #' a Bayesian D-efficient design. Defaults to `NULL` #' @param prior_no_choice Prior utility value for the "no choice" alternative. @@ -55,6 +55,7 @@ #' - `"random"` #' - `"full"` #' - `"orthogonal"` +#' - `"deff"` #' - `"CEA"` #' - `"Modfed"` #' @@ -110,6 +111,16 @@ #' more information about orthogonal designs, see `?DoE.base::oa.design` as #' well as the JSS article on the {DoE.base} package (Grömping, 2018). #' +#' The `"deff"` method creates a "D-Efficient" design where an array from +#' `profiles` is found that maximizes the D efficiency using the Federov +#' algorithm, with the total number of unique choice sets determined by +#' `n_q*n_blocks`. Choice sets are then created by randomly sampling from this +#' array *without replacement*. The choice sets are then repeated to meet the +#' desired number of survey respondents (determined by `n_resp`). If blocking +#' is used, choice set blocks are created from the d-efficient array. For more +#' information about the underlying algorithm for this method, see +#' `?AlgDesign::optFederov`. +#' #' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a #' Bayesian D-efficient design for the choice sets, with the total number of #' unique choice sets determined by `n_q*n_blocks`. The choice sets are then @@ -170,6 +181,15 @@ #' method = 'orthogonal' #' ) #' +#' # Make a D-efficient design +#' design_deff <- cbc_design( +#' profiles = profiles, +#' n_resp = 100, # Number of respondents +#' n_alts = 3, # Number of alternatives per question +#' n_q = 6, # Number of questions per respondent +#' method = 'deff' +#' ) +#' #' # Make a survey by randomly sampling from all possible profiles #' # and include a "no choice" option #' design_random_nochoice <- cbc_design( @@ -256,7 +276,11 @@ cbc_design <- function( ) } else if (method == 'orthogonal') { design <- make_design_orthogonal( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice + ) + } else if (method == 'deff') { + design <- make_design_deff( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice ) } else { design <- make_design_bayesian( @@ -607,11 +631,8 @@ make_design_full_labeled <- function( profiles$profileID <- NULL profiles <- profiles[!duplicated(profiles),] profiles$profileID <- seq(nrow(profiles)) - profiles <- cbind( - profiles[,ncol(profiles)], - profiles[,1:(ncol(profiles)-1)] - ) - names(profiles)[1] <- 'profileID' + profiles <- profiles[c( + 'profileID', names(profiles)[which(names(profiles) != 'profileID')])] if (n_blocks > 1) { # Make blocks design <- suppressMessages(as.data.frame(DoE.base::fac.design( @@ -642,7 +663,7 @@ make_design_full_labeled <- function( # Orthogonal Design ---- make_design_orthogonal <- function( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, label + profiles, n_resp, n_alts, n_q, n_blocks, no_choice ) { # First obtain the orthogonal array oa <- suppressMessages(as.data.frame(DoE.base::oa.design( @@ -681,11 +702,61 @@ make_design_orthogonal <- function( } design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) if (no_choice) { + warning( + 'Using a "no choice" option with orthogonal designs may damage the ', + 'orthogonality properties.' + ) design <- add_no_choice(design, n_alts) } return(design) } +# D-Efficient Design ---- + +make_design_deff <- function( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice +) { + # First obtain the d-efficient array + des <- AlgDesign::optFederov(~., profiles[,2:length(profiles)], n_q) + profiles <- merge(des$design, profiles, all.x = TRUE) + profiles <- profiles[c( + 'profileID', names(profiles)[which(names(profiles) != 'profileID')])] + + + + if (n_blocks > 1) { + q_per_resp <- nrow(oa) / n_blocks + if (q_per_resp %% 1 != 0) { + stop( + 'The number of blocks used cannot be evenly divided into the ', + 'orthogonal array. Use a different number for "n_blocks" that ', + 'is as factor of ', nrow(oa) + ) + } + if (q_per_resp < n_q) { + stop( + 'The orthogonal array cannot be divided into ', n_blocks, + ' blocks such that each respondent sees ', n_q, + ' questions. Either decrease "n_blocks" or increase "n_q".' + ) + } + oa$blockID <- rep(seq(n_blocks), each = q_per_resp) + # Create random choice sets within each block + choice_sets <- make_random_sets_by_block(oa, n_alts, n_blocks) + } else { + choice_sets <- make_random_sets(oa, n_alts) + } + design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) + if (no_choice) { + warning( + 'Using a "no choice" option with orthogonal designs may damage the ', + 'orthogonality properties.' + ) + design <- add_no_choice(design, n_alts) + } + return(design) +} + # Bayesian D-efficient Design ---- make_design_bayesian <- function( diff --git a/R/input_checks.R b/R/input_checks.R index fec045d..5e60569 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -41,10 +41,12 @@ check_design_method <- function(method, priors) { } # Check that an appropriate method is used - if (! method %in% c('random', 'full', 'orthogonal', 'CEA', 'Modfed')) { + if (! method %in% c( + 'random', 'full', 'orthogonal', 'deff', 'CEA', 'Modfed' + )) { stop( 'The "method" argument must be set to "random", "full", ', - '"orthogonal", "Modfed", or "CEA"' + '"orthogonal", "deff", "CEA", or "Modfed"' ) } return(method) @@ -101,6 +103,14 @@ check_inputs_design <- function( } } + # Check that the deff design method does not use a label + + if (method == 'deff') { + if (!is.null(label)) { + stop("D-efficient designs cannot be labeled.") + } + } + # The labeled design isn't yet supported for Bayesian D-efficient designs if (!is.null(label) & method_is_bayesian(method)) { From 0336f227cdb1bbfa6081cd41139a4939c3fdfe42 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 17:04:53 -0400 Subject: [PATCH 19/43] finished adding d-efficient designs --- R/design.R | 48 +++++++++++++++++++----------------------------- R/input_checks.R | 1 + 2 files changed, 20 insertions(+), 29 deletions(-) diff --git a/R/design.R b/R/design.R index e926135..205c0e0 100644 --- a/R/design.R +++ b/R/design.R @@ -40,6 +40,9 @@ #' @param probs If `TRUE`, for Bayesian D-efficient designs the resulting design #' includes average predicted probabilities for each alternative in each #' choice set given the sample from the prior preference distribution. +#' Defaults to `FALSE`.' +#' @param keep_deff If `TRUE`, for D-efficient designs the returned +#' object will be a list containing the design and the D-efficiency score. #' Defaults to `FALSE`. #' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned #' object will be a list containing the design and the DB-error score. @@ -240,6 +243,7 @@ cbc_design <- function( priors = NULL, prior_no_choice = NULL, probs = FALSE, + keep_deff = FALSE, keep_db_error = FALSE, max_iter = 50, parallel = FALSE @@ -260,6 +264,7 @@ cbc_design <- function( priors, prior_no_choice, probs, + keep_deff, keep_db_error, max_iter, parallel, @@ -280,7 +285,7 @@ cbc_design <- function( ) } else if (method == 'deff') { design <- make_design_deff( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_deff ) } else { design <- make_design_bayesian( @@ -714,46 +719,31 @@ make_design_orthogonal <- function( # D-Efficient Design ---- make_design_deff <- function( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_deff ) { # First obtain the d-efficient array - des <- AlgDesign::optFederov(~., profiles[,2:length(profiles)], n_q) + des <- AlgDesign::optFederov(~., profiles[,2:length(profiles)], n_q*n_blocks) + deff <- des$Ge profiles <- merge(des$design, profiles, all.x = TRUE) profiles <- profiles[c( 'profileID', names(profiles)[which(names(profiles) != 'profileID')])] - - - if (n_blocks > 1) { - q_per_resp <- nrow(oa) / n_blocks - if (q_per_resp %% 1 != 0) { - stop( - 'The number of blocks used cannot be evenly divided into the ', - 'orthogonal array. Use a different number for "n_blocks" that ', - 'is as factor of ', nrow(oa) - ) - } - if (q_per_resp < n_q) { - stop( - 'The orthogonal array cannot be divided into ', n_blocks, - ' blocks such that each respondent sees ', n_q, - ' questions. Either decrease "n_blocks" or increase "n_q".' - ) - } - oa$blockID <- rep(seq(n_blocks), each = q_per_resp) - # Create random choice sets within each block - choice_sets <- make_random_sets_by_block(oa, n_alts, n_blocks) + profiles$blockID <- rep(seq(n_blocks), each = nrow(profiles) / n_blocks) + choice_sets <- make_random_sets_by_block(profiles, n_alts, n_blocks) } else { - choice_sets <- make_random_sets(oa, n_alts) + choice_sets <- make_random_sets(profiles, n_alts) } design <- repeat_sets(choice_sets, n_resp, n_alts, n_q, n_blocks) if (no_choice) { - warning( - 'Using a "no choice" option with orthogonal designs may damage the ', - 'orthogonality properties.' - ) design <- add_no_choice(design, n_alts) } + # Print D-efficiency + message("D-efficient design found with D-efficiency of ", round(deff, 5)) + + # Return list containing the design and DB error if keep_deff = TRUE + if (keep_deff) { + return(list(design = design, deff = deff)) + } return(design) } diff --git a/R/input_checks.R b/R/input_checks.R index 5e60569..b86ecd0 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -66,6 +66,7 @@ check_inputs_design <- function( priors, prior_no_choice, probs, + keep_deff, keep_db_error, max_iter, parallel, From 1d1c4106f3bba0145e2efa287cf0c0d8873c3407 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Thu, 6 Jul 2023 17:09:35 -0400 Subject: [PATCH 20/43] update description --- DESCRIPTION | 4 ++-- R/design.R | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0477682..df35fe1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: cbcTools -Title: A Simulation-Based Workflow to Design and Evaluate Choice-Based Conjoint Survey Experiments +Title: Choice-Based Conjoint Experiment Design Generation and Power Evaluation in R Version: 0.4.0 Maintainer: John Helveston Authors@R: c( @@ -8,7 +8,7 @@ Authors@R: c( role = c("cre", "aut", "cph"), email = "john.helveston@gmail.com", comment = c(ORCID = "0000-0002-2657-9191"))) -Description: A simulation-based workflow to design and evaluate choice-based conjoint survey experiments. Generate a variety of survey designs, including full factorial designs, orthogonal designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct a power analysis for a given survey design by estimating the same model on different subsets of the data to simulate different sample sizes. Choice simulation and model estimation in power analyses are handled using the 'logitr' package (Helveston, 2023) . +Description: Design and evaluate choice-based conjoint survey experiments. Generate a variety of survey designs, including random designs, full factorial designs, orthogonal designs, D-efficient designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct a power analysis for a given survey design by estimating the same model on different subsets of the data to simulate different sample sizes. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . D-efficient designs are obtained using the 'AlgDesign' package (Wheeler, 2022) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Choice simulation and model estimation in power analyses are handled using the 'logitr' package (Helveston, 2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true diff --git a/R/design.R b/R/design.R index 205c0e0..0faf6a1 100644 --- a/R/design.R +++ b/R/design.R @@ -69,12 +69,14 @@ #' #' The table below summarizes method compatibility with other design options. #' -#' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | -#' Blocking? +#' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? #' ---|---|---|---|--- -#' `"random"` | ✅ | ✅ | ✅ | ❌ `"full"` | ✅ | ✅ | ✅ | ✅ -#' `"orthogonal"` | ✅ | ❌ | ❌ | ✅ `"CEA"` | ✅ | ❌ | ❌ | ✅ `"Modfed"` | -#' ✅ | ❌ | ✅ | ✅ +#' `"random"` | ✅ | ✅ | ✅ | ❌ +#' `"full"` | ✅ | ✅ | ✅ | ✅ +#' `"orthogonal"` | ✅ | ❌ | ❌ | ✅ +#' `"deff"` | ✅ | ❌ | ✅ | ✅ +#' `"CEA"` | ✅ | ❌ | ❌ | ✅ +#' `"Modfed"` | ✅ | ❌ | ✅ | ✅ #' #' The `"random"` method (the default) creates a design where choice sets are #' created by randomly sampling from the full set of `profiles` *with From 55addb032f274e1bdd6c8a3ac260974cb322363a Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 07:12:45 -0400 Subject: [PATCH 21/43] documentation updates and input checks --- R/design.R | 28 ++++--- R/input_checks.R | 88 +++++++++----------- man/cbc_design.Rd | 189 ++++++++++++++++++++++++++---------------- vignettes/library.bib | 34 +++++++- vignettes/usage.Rmd | 185 +++++++++++++++++++++++++++-------------- 5 files changed, 330 insertions(+), 194 deletions(-) diff --git a/R/design.R b/R/design.R index 0faf6a1..3194a4b 100644 --- a/R/design.R +++ b/R/design.R @@ -64,10 +64,13 @@ #' #' All methods ensure that the two following criteria are met: #' -#' 1. No two profiles are the same within any one choice set. 2. No two choice -#' sets are the same within any one respondent. +#' 1. No two profiles are the same within any one choice set. +#' 2. No two choice sets are the same within any one respondent. #' -#' The table below summarizes method compatibility with other design options. +#' The table below summarizes method compatibility with other design options, +#' including the ability to include a "no choice" option, the creation of a +#' "labeled" design (also called a "alternative-specific" design), the use +#' of restricted profile, and the use of blocking. #' #' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? #' ---|---|---|---|--- @@ -85,9 +88,6 @@ #' and may lead to a deficient experiment in smaller sample sizes, though it #' guarantees equal ability to estimate main and interaction effects. #' -#' For all other methods, a fixed set of choice sets is first created -#' following a particular strategy, and then the choice sets are -#' #' The `"full"` method for ("full factorial") creates a design where choice #' sets are created by randomly sampling from the full set of `profiles` #' *without replacement*. The choice sets are then repeated to meet the @@ -116,13 +116,13 @@ #' more information about orthogonal designs, see `?DoE.base::oa.design` as #' well as the JSS article on the {DoE.base} package (Grömping, 2018). #' -#' The `"deff"` method creates a "D-Efficient" design where an array from +#' The `"deff"` method creates a "D-efficient" design where an array from #' `profiles` is found that maximizes the D efficiency using the Federov #' algorithm, with the total number of unique choice sets determined by #' `n_q*n_blocks`. Choice sets are then created by randomly sampling from this #' array *without replacement*. The choice sets are then repeated to meet the #' desired number of survey respondents (determined by `n_resp`). If blocking -#' is used, choice set blocks are created from the d-efficient array. For more +#' is used, choice set blocks are created from the D-efficient array. For more #' information about the underlying algorithm for this method, see #' `?AlgDesign::optFederov`. #' @@ -144,8 +144,16 @@ #' (2020). Generating Optimal Designs for Discrete Choice Experiments in R: #' The idefix Package. Journal of Statistical Software, 96(3), 1–41, #' \doi{10.18637/jss.v096.i03} -#' @return A data frame containing a choice-based conjoint survey design where -#' each row is an alternative. +#' @return The returned `design` data frame contains a choice-based conjoint +#' survey design where each row is an alternative. It includes the following +#' columns: +#' +#' - `profileID`: Identifies the profile in `profiles`. +#' - `respID`: Identifies each survey respondent. +#' - `qID`: Identifies the choice question answered by the respondent. +#' - `altID`:Identifies the alternative in any one choice observation. +#' - `obsID`: Identifies each unique choice observation across all respondents. +#' - `blockID`: If blocking is used, identifies each unique block. #' @export #' @examples #' library(cbcTools) diff --git a/R/input_checks.R b/R/input_checks.R index b86ecd0..f101dc1 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -27,8 +27,22 @@ check_inputs_restrict <- function(profiles) { } check_design_method <- function(method, priors) { + + # Check that an appropriate method is used + + if (! method %in% c( + 'random', 'full', 'orthogonal', 'deff', 'CEA', 'Modfed' + )) { + stop( + 'The "method" argument must be set to "random", "full", ', + '"orthogonal", "deff", "CEA", or "Modfed"' + ) + } + + # Check that a Bayesian method is used if priors are used + if (!is.null(priors)) { - if (! method_is_bayesian(method)) { + if (!method_is_bayesian(method)) { # Set method to 'CEA' if priors are specified and # user didn't specify an appropriate method. warning( @@ -39,16 +53,7 @@ check_design_method <- function(method, priors) { method <- 'CEA' } } - # Check that an appropriate method is used - if (! method %in% c( - 'random', 'full', 'orthogonal', 'deff', 'CEA', 'Modfed' - )) { - stop( - 'The "method" argument must be set to "random", "full", ', - '"orthogonal", "deff", "CEA", or "Modfed"' - ) - } return(method) } @@ -73,6 +78,8 @@ check_inputs_design <- function( profiles_restricted ) { + # Checks on blocking + if (n_blocks < 1) { stop('n_blocks must be greater than or equal to 1') } @@ -92,42 +99,24 @@ check_inputs_design <- function( } } - # Check that orthogonal design method does not use a label or a - # restricted set of profiles - - if (method == 'orthogonal') { - if (!is.null(label)) { - stop("Orthogonal designs cannot be labeled.") - } - if (profiles_restricted) { - stop("Orthogonal designs cannot use restricted profiles") - } - } - - # Check that the deff design method does not use a label + # Checks on labeled designs - if (method == 'deff') { if (!is.null(label)) { - stop("D-efficient designs cannot be labeled.") + if (!method %in% c('random', 'full')) { + stop( + 'Labeled designs are currently only supported with the "random" or ', + '"full" method.' + ) + } } - } - # The labeled design isn't yet supported for Bayesian D-efficient designs + # Check on restricted profile sets - if (!is.null(label) & method_is_bayesian(method)) { - stop( - 'The use of the "label" argument is currently not compatible with ', - 'Bayesian D-efficient designs' - ) - } - - # If using a Bayesian D-efficient design with a no choice option, user must - # specify a value for prior_no_choice - if (no_choice) { - if (!is.null(priors) & is.null(prior_no_choice)) { + if (profiles_restricted) { + if (!method %in% c('random', 'full')) { stop( - 'If "no_choice = TRUE", you must specify the prior utility ', - 'value for the "no choice" option using prior_no_choice' + 'Restricted profile sets can only be used with the "random", "full" ', + '"deff", or "Modfed" methods' ) } } @@ -136,15 +125,16 @@ check_inputs_design <- function( if (!is.null(priors)) { - # Check that user specified an appropriate method - # This should already be handled - if (! method_is_bayesian(method)) { - stop( - 'Since "priors" are specified, the "method" argument must ', - 'be either "Modfed" or "CEA" to obtain a Bayesian ', - 'D-efficient design' - ) - } + # If using a Bayesian D-efficient design with a no choice option, + # user must specify a value for prior_no_choice + + if (no_choice & is.null(prior_no_choice)) { + stop( + 'If "no_choice = TRUE" with the "CEA" or "Modfed" method, you must ', + 'specify the prior utility for the "no choice" option using ', + '"prior_no_choice"' + ) + } # Check that prior names aren't missing prior_names <- names(priors) diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index 5c3630b..4dbbcb5 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -18,14 +18,15 @@ cbc_design( priors = NULL, prior_no_choice = NULL, probs = FALSE, + keep_deff = FALSE, keep_db_error = FALSE, max_iter = 50, parallel = FALSE ) } \arguments{ -\item{profiles}{A data frame in which each row is a possible profile. -This can be generated using the \code{cbc_profiles()} function.} +\item{profiles}{A data frame in which each row is a possible profile. This +can be generated using the \code{cbc_profiles()} function.} \item{n_resp}{Number of survey respondents.} @@ -34,41 +35,45 @@ This can be generated using the \code{cbc_profiles()} function.} \item{n_q}{Number of questions per respondent.} \item{n_blocks}{Number of blocks used in Orthogonal or Bayesian D-efficient -designs. Max allowable is one block per respondent. Defaults to \code{1}, meaning -every respondent sees the same choice set.} +designs. Max allowable is one block per respondent. Defaults to \code{1}, +meaning every respondent sees the same choice set.} -\item{n_draws}{Number of draws used in simulating the prior distribution -used in Bayesian D-efficient designs. Defaults to \code{50}.} +\item{n_draws}{Number of draws used in simulating the prior distribution used +in Bayesian D-efficient designs. Defaults to \code{50}.} \item{n_start}{A numeric value indicating the number of random start designs to use in obtaining a Bayesian D-efficient design. The default is \code{5}. Increasing \code{n_start} can result in a more efficient design at the expense of increased computational time.} -\item{no_choice}{Include a "no choice" option in the choice sets? Defaults -to \code{FALSE}. If \code{TRUE}, the total number of alternatives per question will be +\item{no_choice}{Include a "no choice" option in the choice sets? Defaults to +\code{FALSE}. If \code{TRUE}, the total number of alternatives per question will be one more than the provided \code{n_alts} argument.} -\item{label}{The name of the variable to use in a "labeled" design -(also called an "alternative-specific design") such that each set of -alternatives contains one of each of the levels in the \code{label} attribute. -Currently not compatible with Bayesian D-efficient designs. If used, -the \code{n_alts} argument will be ignored as its value is defined by the unique -number of levels in the \code{label} variable. Defaults to \code{NULL}.} +\item{label}{The name of the variable to use in a "labeled" design (also +called an "alternative-specific design") such that each set of alternatives +contains one of each of the levels in the \code{label} attribute. Currently not +compatible with Bayesian D-efficient designs. If used, the \code{n_alts} +argument will be ignored as its value is defined by the unique number of +levels in the \code{label} variable. Defaults to \code{NULL}.} \item{method}{Choose the design method to use: \code{"random"}, \code{"full"}, -\code{"orthogonal"}, \code{"CEA"} or \code{"Modfed"}. Defaults to \code{"random"}. See details -below for complete description of each method.} +\code{"orthogonal"}, \code{"deff"}, \code{"CEA"}, or \code{"Modfed"}. Defaults to \code{"random"}. +See details below for complete description of each method.} -\item{priors}{A list of one or more assumed prior parameters used to -generate a Bayesian D-efficient design. Defaults to \code{NULL}} +\item{priors}{A list of one or more assumed prior parameters used to generate +a Bayesian D-efficient design. Defaults to \code{NULL}} \item{prior_no_choice}{Prior utility value for the "no choice" alternative. Only required if \code{no_choice = TRUE}. Defaults to \code{NULL}.} -\item{probs}{If \code{TRUE}, for Bayesian D-efficient designs the resulting -design includes average predicted probabilities for each alternative in each +\item{probs}{If \code{TRUE}, for Bayesian D-efficient designs the resulting design +includes average predicted probabilities for each alternative in each choice set given the sample from the prior preference distribution. +Defaults to \code{FALSE}.'} + +\item{keep_deff}{If \code{TRUE}, for D-efficient designs the returned +object will be a list containing the design and the D-efficiency score. Defaults to \code{FALSE}.} \item{keep_db_error}{If \code{TRUE}, for Bayesian D-efficient designs the returned @@ -83,25 +88,35 @@ iterations when searching for a Bayesian D-efficient design. The default is over multiple cores. The default is \code{FALSE}.} } \value{ -A data frame containing a choice-based conjoint survey design where -each row is an alternative. +The returned \code{design} data frame contains a choice-based conjoint +survey design where each row is an alternative. It includes the following +columns: +\itemize{ +\item \code{profileID}: Identifies the profile in \code{profiles}. +\item \code{respID}: Identifies each survey respondent. +\item \code{qID}: Identifies the choice question answered by the respondent. +\item \code{altID}:Identifies the alternative in any one choice observation. +\item \code{obsID}: Identifies each unique choice observation across all respondents. +\item \code{blockID}: If blocking is used, identifies each unique block. +} } \description{ This function creates a data frame containing a choice-based conjoint survey design where each row is an alternative. Generate a variety of survey -designs, including full factorial designs, orthogonal designs, and -Bayesian D-efficient designs as well as designs with "no choice" options -and "labeled" (also known as "alternative specific") designs. +designs, including full factorial designs, orthogonal designs, and Bayesian +D-efficient designs as well as designs with "no choice" options and "labeled" +(also known as "alternative specific") designs. } \details{ -The \code{method} argument determines the design method used. Options are: +The \code{method} argument determines the design method used. Options +are: \itemize{ \item \code{"random"} \item \code{"full"} \item \code{"orthogonal"} +\item \code{"deff"} \item \code{"CEA"} \item \code{"Modfed"} -} All methods ensure that the two following criteria are met: \enumerate{ @@ -109,55 +124,78 @@ All methods ensure that the two following criteria are met: \item No two choice sets are the same within any one respondent. } -The table below summarizes method compatibility with other design options.\tabular{lllll}{ +The table below summarizes method compatibility with other design options, +including the ability to include a "no choice" option, the creation of a +"labeled" design (also called a "alternative-specific" design), the use +of restricted profile, and the use of blocking.\tabular{lllll}{ Method \tab Include "no choice"? \tab Labeled designs? \tab Restricted profiles? \tab Blocking? \cr \code{"random"} \tab ✅ \tab ✅ \tab ✅ \tab ❌ \cr \code{"full"} \tab ✅ \tab ✅ \tab ✅ \tab ✅ \cr - \code{"orthogonal"} \tab ✅ \tab ❌ \tab ❌ \tab ❌ \cr + \code{"orthogonal"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr + \code{"deff"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr \code{"CEA"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr \code{"Modfed"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr } The \code{"random"} method (the default) creates a design where choice sets are -created by randomly sampling from the full set of \code{profiles}. This means -that few (if any) respondents will see the same sets of choice sets. This -method is less efficient than other approaches and may lead to a deficient -experiment in smaller sample sizes, though it guarantees equal ability to -estimate main and interaction effects. - -The \code{"full"} method for ("full factorial") creates a design where a fixed -set of choice sets is created by randomly sampling from the full set of -\code{profiles}, which is then repeated to meet the desired number of survey -respondents (\code{n_resp}). If blocking is used, choice set blocks are created using -mutually exclusive subsets of \code{profiles} within each block. This method is -less efficient than other approaches and may lead to a deficient experiment -in smaller sample sizes, though it guarantees equal ability to estimate main -and interaction effects. For more information about blocking with full -factorial designs, see \code{?DoE.base::fac.design} as well as the JSS article on -the {DoE.base} package (Grömping, 2018). +created by randomly sampling from the full set of \code{profiles} *with +*replacement. This means that few (if any) respondents will see the same +sets of choice sets. This method is less efficient than other approaches +and may lead to a deficient experiment in smaller sample sizes, though it +guarantees equal ability to estimate main and interaction effects. + +The \code{"full"} method for ("full factorial") creates a design where choice +sets are created by randomly sampling from the full set of \code{profiles} +\emph{without replacement}. The choice sets are then repeated to meet the +desired number of survey respondents (determined by \code{n_resp}). If blocking +is used, choice set blocks are created using mutually exclusive subsets of +\code{profiles} within each block. This method produces a design with similar +performance with that of the \code{"random"} method, except the choice sets are +repeated and thus there will be many more opportunities for different +respondents to see the same choice sets. This method is less efficient than +other approaches and may lead to a deficient experiment in smaller sample +sizes, though it guarantees equal ability to estimate main and interaction +effects. For more information about blocking with full factorial designs, +see \code{?DoE.base::fac.design} as well as the JSS article on the {DoE.base} +package (Grömping, 2018). The \code{"orthogonal"} method creates a design where an orthogonal array from -the full set of \code{profiles} is found. A fixed set of choice sets is created by -randomly sampling from this orthogonal array, which is then repeated to meet -the desired number of survey respondents (\code{n_resp}). For cases where an -orthogonal array cannot be found, a full factorial design is used. This -approach is also sometimes called a "main effects" design since orthogonal -arrays focus the information on the main effects at the expense of -information about interaction effects. For more information about orthogonal -designs, see \code{?DoE.base::oa.design} as well as the JSS article on the -{DoE.base} package (Grömping, 2018). +the full set of \code{profiles} is found and then choice sets are created by +randomly sampling from this orthogonal array \emph{without replacement}. The +choice sets are then repeated to meet the desired number of survey +respondents (determined by \code{n_resp}). If blocking is used, choice set +blocks are created using mutually exclusive subsets of the orthogonal array +within each block. For cases where an orthogonal array cannot be found, a +full factorial design is used. This approach is also sometimes called a +"main effects" design since orthogonal arrays focus the information on the +main effects at the expense of information about interaction effects. For +more information about orthogonal designs, see \code{?DoE.base::oa.design} as +well as the JSS article on the {DoE.base} package (Grömping, 2018). + +The \code{"deff"} method creates a "D-efficient" design where an array from +\code{profiles} is found that maximizes the D efficiency using the Federov +algorithm, with the total number of unique choice sets determined by +\code{n_q*n_blocks}. Choice sets are then created by randomly sampling from this +array \emph{without replacement}. The choice sets are then repeated to meet the +desired number of survey respondents (determined by \code{n_resp}). If blocking +is used, choice set blocks are created from the D-efficient array. For more +information about the underlying algorithm for this method, see +\code{?AlgDesign::optFederov}. The \code{"CEA"} and \code{"Modfed"} methods use the specified \code{priors} to create a -Bayesian D-efficient design. A fixed set of choice sets is first found and -then repeated to meet the desired number of survey respondents (\code{n_resp}). -The total number of unique choice sets is determined by \code{n_q*n_blocks}. -If \code{"CEA"} or \code{"Modfed"} is used without specifying \code{priors}, a prior of all -\code{0}s will be used and a warning message stating this will be shown. -Restricted sets of \code{profiles} can only be used with \code{"Modfed"}. For more -details on Bayesian D-efficient designs, see \code{?idefix::CEA} and -\code{?idefix::Modfed} as well as the JSS article on the {idefix} package -(Traets et al, 2020). +Bayesian D-efficient design for the choice sets, with the total number of +unique choice sets determined by \code{n_q*n_blocks}. The choice sets are then +repeated to meet the desired number of survey respondents (determined by +\code{n_resp}). If \code{"CEA"} or \code{"Modfed"} is used without specifying \code{priors}, a +prior of all \code{0}s will be used and a warning message stating this will be +shown. In the opposite case, if \code{priors} are specified but neither Bayesian +method is used, the \code{"CEA"} method will be used and a warning stating this +will be shown. Restricted sets of \code{profiles} can only be used with +\code{"Modfed"}. For more details on Bayesian D-efficient designs, see +\code{?idefix::CEA} and \code{?idefix::Modfed} as well as the JSS article on the +{idefix} package (Traets et al, 2020). +} } \examples{ library(cbcTools) @@ -173,7 +211,7 @@ profiles <- cbc_profiles( # Make a survey by randomly sampling from all possible profiles # (This is the default setting where method = 'random') -design_full <- cbc_design( +design_random <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -190,7 +228,7 @@ design_full <- cbc_design( ) # Make a survey from an orthogonal array of profiles -design_ortho <- cbc_design( +design_orthogonal <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -198,9 +236,18 @@ design_ortho <- cbc_design( method = 'orthogonal' ) +# Make a D-efficient design +design_deff <- cbc_design( + profiles = profiles, + n_resp = 100, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'deff' +) + # Make a survey by randomly sampling from all possible profiles # and include a "no choice" option -design_full_nochoice <- cbc_design( +design_random_nochoice <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -210,7 +257,7 @@ design_full_nochoice <- cbc_design( # Make a survey by randomly sampling from all possible profiles # with each level of the "type" attribute appearing as an alternative -design_full_labeled <- cbc_design( +design_random_labeled <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -236,9 +283,11 @@ design_bayesian <- cbc_design( ) } \references{ -Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41, -\doi{10.18637/jss.v085.i05} -Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, +Grömping, U. (2018). R Package DoE.base for Factorial +Experiments. Journal of Statistical Software, 85(5), 1–41, +\doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. +(2020). Generating Optimal Designs for Discrete Choice Experiments in R: +The idefix Package. Journal of Statistical Software, 96(3), 1–41, \doi{10.18637/jss.v096.i03} } \keyword{DoE.base} diff --git a/vignettes/library.bib b/vignettes/library.bib index c7564af..b1545a7 100644 --- a/vignettes/library.bib +++ b/vignettes/library.bib @@ -1,4 +1,4 @@ -@article{Grömping2018, +@article{Gromping2018, title={R Package DoE.base for Factorial Experiments}, volume={85}, url={https://www.jstatsoft.org/index.php/jss/article/view/v085i05}, @@ -32,4 +32,34 @@ @article{Helveston2023 author={Helveston, John Paul}, year={2023}, pages={1–37} -} \ No newline at end of file +} + +@article{Wickham2014, + title={Tidy Data}, + volume={59}, + url={https://www.jstatsoft.org/index.php/jss/article/view/v059i10}, + doi={10.18637/jss.v059.i10}, + number={10}, + journal={Journal of Statistical Software}, + author={Wickham, Hadley}, + year={2014}, + pages={1–23} +} + +@article{Wickham2016, + title={Package ‘ggplot2’}, + author={Wickham, Hadley and Chang, Winston and Wickham, Maintainer Hadley}, + journal={Create elegant data visualisations using the grammar of graphics. Version}, + volume={2}, + number={1}, + pages={1--189}, + year={2016} +} + +@Manual{Wheeler2022, + title = {AlgDesign: Algorithmic Experimental Design}, + author = {Bob Wheeler}, + year = {2022}, + note = {R package version 1.2.1}, + url = {https://CRAN.R-project.org/package=AlgDesign}, +} diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 6e81368..d7f638f 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -75,19 +75,32 @@ restricted_profiles # Generate survey designs -Once a set of profiles is obtained, a conjoint survey can then be generated using the `cbc_design()` function. A variety of survey designs can be generated, including: +Once a set of profiles is obtained, a conjoint survey can then be generated using the `cbc_design()` function. The `method` argument determines the design method used. Options are: -- Full factorial designs -- Orthogonal (a.k.a. "main effects") designs -- Labeled designs (a.k.a. "alternative-specific" designs) -- Designs with a "no choice" option (a.k.a. "outside good") -- Bayesian D-efficient designs +- `"random"` +- `"full"` +- `"orthogonal"` +- `"deff"` +- `"CEA"` +- `"Modfed"` -## Full factorial designs +All methods ensure that the two following criteria are met: + +1. No two profiles are the same within any one choice set. +2. No two choice sets are the same within any one respondent. + +The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. -Full factorial designs are obtained by setting `method = 'full'` (the default setting for `method`). This method simply samples from the full set of `profiles`, ensuring that no two profiles are the same in any choice question and that no two choice sets are the same for any one respondent. Blocking can be implemented where blocks are created from subsets of the full factorial design. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Grömping2018]. +Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? +---|---|---|---|--- +`"random"` | ✅ | ✅ | ✅ | ❌ +`"full"` | ✅ | ✅ | ✅ | ✅ +`"orthogonal"` | ✅ | ❌ | ❌ | ✅ +`"deff"` | ✅ | ❌ | ✅ | ✅ +`"CEA"` | ✅ | ❌ | ❌ | ✅ +`"Modfed"` | ✅ | ❌ | ✅ | ✅ -The resulting `design` data frame includes the following columns: +The returned `design` data frame contains a choice-based conjoint survey design where each row is an alternative. It includes the following columns: - `profileID`: Identifies the profile in `profiles`. - `respID`: Identifies each survey respondent. @@ -96,44 +109,29 @@ The resulting `design` data frame includes the following columns: - `obsID`: Identifies each unique choice observation across all respondents. - `blockID`: If blocking is used, identifies each unique block. -```{r} -design_full <- cbc_design( - profiles = profiles, - n_resp = 900, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - method = 'full' # This is the default method -) +## Random designs -dim(design_full) # View dimensions -head(design_full) # Preview first 6 rows -``` - -## Orthogonal designs - -Orthogonal designs are obtained by setting `method = 'orthogonal'`. This method first finds an orthogonal array from the full set of `profiles` (if possible), then randomly selects from it to create choice sets. For some designs an orthogonal array can't be found, in which case a full factorial design is used. This approach is also sometimes called a "main effects" design since orthogonal arrays focus the information on the main effects at the expense of information about interaction effects. For more information about orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article on the {DoE.base} package [@Grömping2018] +The `"random"` method (the default) creates a design where choice sets are created by randomly sampling from the full set of `profiles` *with* replacement. This means that few (if any) respondents will see the same sets of choice sets. This method is less efficient than other approaches and may lead to a deficient experiment in smaller sample sizes, though it guarantees equal ability to estimate main and interaction effects. ```{r} -#| message: true - -design_ortho <- cbc_design( +design_random <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - method = 'orthogonal' + method = 'random' # This is the default method ) -dim(design_ortho) # View dimensions -head(design_ortho) # Preview first 6 rows +dim(design_random) +head(design_random) ``` ## Labeled designs (a.k.a. "alternative-specific" designs) -You can also make a "labeled" design (also known as "alternative-specific" design) where the levels of one attribute are used as a label by setting the `label` argument to that attribute. This by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, so the `n_alts` argument is overridden. Here is an example of a labeled full factorial survey using the `type` attribute as the label: +A "labeled" design (also known as "alternative-specific" design) is one where the levels of one attribute are used as a label. This can be created by setting the `label` argument to that attribute. As of now, only `"random"` and `"full"` methods support labeled designs. Since this by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, the `n_alts` argument is overridden. Here is an example of a labeled random design using the `type` attribute as the label: ```{r} -design_labeled <- cbc_design( +design_random_labeled <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question @@ -141,15 +139,15 @@ design_labeled <- cbc_design( label = "type" # Set the "type" attribute as the label ) -dim(design_labeled) -head(design_labeled) +dim(design_random_labeled) +head(design_random_labeled) ``` In the above example, you can see in the first six rows of the survey that the `type` attribute is always fixed to be the same order, ensuring that each level in the `type` attribute will always be shown in each choice question. ## Designs with a "no choice" option (a.k.a. an "outside good") -You can include a "no choice" (also known as an "outside good") option in your survey by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. +A "no choice" (also known as an "outside good") option can be included by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. ```{r} design_nochoice <- cbc_design( @@ -164,9 +162,64 @@ dim(design_nochoice) head(design_nochoice) ``` +## Full factorial designs + +The `"full"` method for ("full factorial") creates a design where choice sets are created by randomly sampling from the full set of `profiles` *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created using mutually exclusive subsets of `profiles` within each block. This method produces a design with similar performance with that of the `"random"` method, except the choice sets are repeated and thus there will be many more opportunities for different respondents to see the same choice sets. This method is less efficient than other approaches and may lead to a deficient experiment in smaller sample sizes, though it guarantees equal ability to estimate main and interaction effects. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Gromping2018]. + +```{r} +design_full <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'full' +) + +dim(design_full) +head(design_full) +``` + +## Orthogonal designs + +The `"orthogonal"` method creates a design where an orthogonal array from the full set of `profiles` is found and then choice sets are created by randomly sampling from this orthogonal array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created using mutually exclusive subsets of the orthogonal array within each block. For cases where an orthogonal array cannot be found, a full factorial design is used. This approach is also sometimes called a "main effects" design since orthogonal arrays focus the information on the main effects at the expense of information about interaction effects. For more information about orthogonal designs, see `?DoE.base::oa.design` as well as the JSS article on the {DoE.base} package [@Gromping2018]. + +```{r} +#| message: true + +design_orthogonal <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'orthogonal' +) + +dim(design_orthogonal) +head(design_orthogonal) +``` + +## D-efficient designs + +The `"deff"` method creates a "D-efficient" design where an array from `profiles` is found that maximizes the D-efficiency using the Federov algorithm, with the total number of unique choice sets determined by `n_q*n_blocks`. The optimization is handled using the [{AlgDesign} package](https://cran.r-project.org/package=AlgDesign). Choice sets are then created by randomly sampling from this array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created from the D-efficient array. For more information about the underlying algorithm for this method, see `?AlgDesign::optFederov` [@Wheeler2022]. + +```{r} +#| message: true + +design_deff <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + method = 'orthogonal' +) + +dim(design_deff) +head(design_deff) +``` + ## Bayesian D-efficient designs -Bayesian D-efficient designs are obtained by setting `method = 'CEA'` or `method = 'Modfed'` along with a specified list of `priors` (parameters that define a prior utility model). These designs are optimized to minimize the D-error of the design given a prior model. The optimization is handled using the [{idefix} package](https://cran.r-project.org/web/packages/idefix/index.html). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. If `method` is set to `"CEA"` or `"Modfed"` but without `priors` specified, a prior of all `0`s will be used and a warning message stating this will be shown. If you are using a restricted set of `profiles`, only the `"Modfed"` method can be used as `"CEA"` requires unrestricted `profiles`. For more details on Bayesian D-efficient designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the {idefix} package [@Traets2020]. +The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a Bayesian D-efficient design for the choice sets, with the total number of unique choice sets determined by `n_q*n_blocks`. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). These designs are optimized to minimize the D-error of the design given a prior model, which is handled using the [{idefix} package](https://cran.r-project.org/web/packages/idefix/index.html). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. If `"CEA"` or `"Modfed"` is used without specifying `priors`, a prior of all `0`s will be used and a warning message stating this will be shown. In the opposite case, if `priors` are specified but neither Bayesian method is used, the `"CEA"` method will be used and a warning stating this will be shown. Restricted sets of `profiles` can only be used with `"Modfed"`. For more details on Bayesian D-efficient designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the {idefix} package [@Traets2020]. In the example below, the prior model assumes the following parameters: @@ -192,7 +245,7 @@ dim(design_bayesian) head(design_bayesian) ``` -Bayesian D-efficient designs that include a "no choice" option should set `no_choice = TRUE` and also define a prior for the "no choice" option using `prior_no_choice`, e.g.: +Bayesian D-efficient designs that include a "no choice" option should set `no_choice = TRUE` and also define a prior for the "no choice" option using `prior_no_choice`: ```{r} design_bayesian_no_choice <- cbc_design( @@ -239,11 +292,7 @@ cbc_overlap(design) # Simulate choices -You can simulate choices for a given `design` using the `cbc_choices()` function. Choices are simulated using the [{logitr} package](https://jhelvy.github.io/logitr/). For more information, see `?logitr::logitr` as well as the JSS article on the {logitr} package [@Helveston2023]. - -## Random choices - -By default, random choices are simulated: +Choices for a given design can be simulated using the `cbc_choices()` function. This function requires a `design` argument (a design generated by `cbc_design()`), and an `obsID` argument, which is the column in `design` that identifies each unique choice observation. By default, random choices are simulated: ```{r} data <- cbc_choices( @@ -254,21 +303,23 @@ data <- cbc_choices( head(data) ``` -## Choices according to a prior +Choices can also be simulated according to an assumed prior model. For this option, choices are simulated using the `predict.logitr()` method from the [{logitr} package](https://jhelvy.github.io/logitr/) [@Helveston2023], which makes predictions according to multinomial or mixed logit models. -You can also pass a list of prior parameters to define a utility model that will be used to simulate choices. In the example below, the choices are simulated using a utility model with the following parameters: - -- 1 continuous parameter for `price` -- 2 categorical parameters for `type` (`'Gala'` and `'Honeycrisp'`) -- 2 categorical parameters for `freshness` (`"Average"` and `"Excellent"`) - -Note that for categorical variables (`type` and `freshness` in this example), the first level defined when using `cbc_profiles()` is set as the reference level. The example below defines the following utility model for simulating choices for each alternative _j_: +The example below demonstrates how to simulate choices according to a multinomial logit model with fixed parameters. Note that for categorical variables (`type` and `freshness` in this example), the first level defined when using `cbc_profiles()` is set as the reference level. In this example, the parameters define the following utility model for each alternative _j_: $$ -u_j = -0.1price_j + 0.1typeGala_j + 0.2typeHoneycrisp_j + 0.1freshnessAverage_j + 0.2freshnessExcellent_j + \varepsilon_j +u_{j} = + -0.1 p_j + + 0.1 t^\mathrm{Gala}_j + 0.2 t^\mathrm{Honeycrisp}_j + + 0.1 f^\mathrm{Average}_j + 0.2 f^\mathrm{Excellent}_j + + \varepsilon_{j} $$ -```{r, eval=FALSE} +where $p$ is price, $t$ is type, and $f$ is freshness. + +```{r} +#| eval: false + data <- cbc_choices( design = design, obsID = "obsID", @@ -280,14 +331,16 @@ data <- cbc_choices( ) ``` -If you wish to include a prior model with an interaction, you can do so inside the `priors` list. For example, here is the same example as above but with an interaction between `price` and `type` added: +The prior model used to simulate choices can also include interaction effects. For example, the example below is the same as the previous example but with an added interaction between `price` and `type`: + +```{r} +#| eval: false -```{r, eval=FALSE} data <- cbc_choices( design = design, obsID = "obsID", priors = list( - price = 0.1, + price = -0.1, type = c(0.1, 0.2), freshness = c(0.1, 0.2), `price*type` = c(0.1, 0.5) @@ -295,9 +348,11 @@ data <- cbc_choices( ) ``` -Finally, you can also simulate data for a mixed logit model where parameters follow a normal or log-normal distribution across the population. In the example below, the `randN()` function is used to specify the `type` attribute with 2 random normal parameters with a specified vector of means (`mean`) and standard deviations (`sd`) for each level of `type`. Log-normal parameters are specified using `randLN()`. +Finally, to simulate choices according to a mixed logit model where parameters follow a normal or log-normal distribution across the population, the `randN()` and `randLN()` functions can be used inside the `priors` list. The example below models the `type` attribute with two random normal parameters using a vector of means (`mean`) and standard deviations (`sd`) for each level of `type`: + +```{r} +#| eval: false -```{r, eval=FALSE} data <- cbc_choices( design = design, obsID = "obsID", @@ -309,9 +364,13 @@ data <- cbc_choices( ) ``` -# Conduct a power analysis +# Assess power + +The simulated choice data can be used to conduct a power analysis by estimating a model multiple times with incrementally increasing sample sizes. As the sample size increases, the estimated coefficient standard errors will decrease, enabling the experimenter to identify the sample size required to achieve a desired level of precision. + +The `cbc_power()` function achieves this by partitioning the choice data into multiple sizes (defined by the `nbreaks` argument) and then estimating a user-defined choice model on each data subset. Model parameters are defined as a vector of attribute names that refer to column names in the `data` object. All models are estimated using the [{logitr} package](https://jhelvy.github.io/logitr/), and any additional arguments for estimating models with the \pkg{logitr} package can be passed through the `cbc_power()` function. For example, to assess power for a mixed logit model, the `randPars` argument can be used. For more information, see `?logitr::logitr` as well as the JSS article on the {logitr} package [@Helveston2023]. -The simulated choice data can be used to conduct a power analysis by estimating the same model multiple times with incrementally increasing sample sizes. As the sample size increases, the estimated coefficient standard errors will decrease (i.e. coefficient estimates become more precise). The `cbc_power()` function achieves this by partitioning the choice data into multiple sizes (defined by the `nbreaks` argument) and then estimating a user-defined choice model on each data subset. In the example below, 10 different sample sizes are used. All models are estimated using the [{logitr} package](https://jhelvy.github.io/logitr/). For more information, see `?logitr::logitr` as well as the JSS article on the {logitr} package [@Helveston2023]. +In the example below, the simulated choice data is broken into 10 chunks with increasing sample sizes and a multinomial logit model is estimated on each chunk with parameters for `price`, `type`, and `freshness`. ```{r} power <- cbc_power( @@ -327,17 +386,17 @@ head(power) tail(power) ``` -The `power` data frame contains the coefficient estimates and standard errors for each sample size. You can quickly visualize the outcome to identify a required sample size for a desired level of parameter precision by using the `plot()` method: +The returned data frame contains the coefficient estimates and standard errors for each chunk of the data. In the example above, it is clear that the standard errors for a sample size of 900 are much lower than those for a sample size of just 90. This approach can be used to more precisely identify sample size requirements by increasing `nbreaks`. + +Visualizing the results of the power analysis can be particularly helpful for identifying sample size requirements. Since the `cbc_power()` function returns a data frame in a "tidy" (or "long") format [@Wickham2014], the results can be conveniently plotted using the popular {ggplot2} package [@Wickham2016]. A `plot.cbc_errors()` method is included in {cbcTools} to create a simple ggplot of the power curves. ```{r power} plot(power) ``` -If you want to examine any other aspects of the models other than the standard errors, you can set `return_models = TRUE` and `cbc_power()` will return a list of estimated models. The example below prints a summary of the last model in the list of models: +Researchers may be interested in aspects other than standard errors. By setting `return_models = TRUE`, the `cbc_power()` function will return a list of estimated models (one for each data chunk), which can then be used to examine other model objects. The example below prints a summary of the last model in the list of returned models from a power analysis. ```{r} -library(logitr) - models <- cbc_power( data = data, pars = c("price", "type", "freshness"), From 4bec9db1bdeeac0ffa598af4e8a72047ec056331 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 08:03:40 -0400 Subject: [PATCH 22/43] fix blockID issue in reordering columns --- R/design.R | 3 ++- R/input_checks.R | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/R/design.R b/R/design.R index 3194a4b..01f882c 100644 --- a/R/design.R +++ b/R/design.R @@ -435,7 +435,8 @@ get_col_types <- function(data) { } reorder_cols <- function(design) { - metaNames <- c("profileID", "respID", "qID", "altID", "obsID", "blockID") + metaNames <- c("profileID", "respID", "qID", "altID", "obsID") + if ('blockID' %in% names(design)) { metaNames <- c(metaNames, 'blockID') } varNames <- setdiff(names(design), metaNames) design <- as.data.frame(design)[, c(metaNames, varNames)] return(design) diff --git a/R/input_checks.R b/R/input_checks.R index f101dc1..33fadd7 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -89,7 +89,10 @@ check_inputs_design <- function( } if ((n_blocks > 1) & (method == 'random')) { - stop('The "random" method cannot use blocking.') + stop( + 'The "random" method cannot use blocking. Either change the design ', + 'method or set "n_blocks = 1"' + ) if ((method == 'full') & profiles_restricted) { stop( 'The "full" method cannot use restricted profiles when blocking ', From a1d9842d01392374f0c1b414bd6cdfccf5e2f16d Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 08:06:23 -0400 Subject: [PATCH 23/43] Update usage.Rmd --- vignettes/usage.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index d7f638f..5f55d93 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -410,7 +410,7 @@ models <- cbc_power( summary(models[[10]]) ``` -## Pipe it all together! +# Pipe it all together! One of the convenient features of how the package is written is that the object generated in each step is used as the first argument to the function for the next step. Thus, just like in the overall program diagram, the functions can be piped together. For example, the "pipeline" below uses the Base R pipe operator (`|>`) to generate profiles, generate a design, simulate choices according to a prior utility model, conduct a power analysis, and then finally plot the results: From 45f4122d509fe6309f3115871b93048c64224c8a Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 08:12:17 -0400 Subject: [PATCH 24/43] fixed references --- R/design.R | 14 ++++++++------ man/cbc_design.Rd | 12 +++++++----- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/R/design.R b/R/design.R index 01f882c..aa00026 100644 --- a/R/design.R +++ b/R/design.R @@ -138,12 +138,14 @@ #' `"Modfed"`. For more details on Bayesian D-efficient designs, see #' `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the #' {idefix} package (Traets et al, 2020). -#' @references Grömping, U. (2018). R Package DoE.base for Factorial -#' Experiments. Journal of Statistical Software, 85(5), 1–41, -#' \doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. -#' (2020). Generating Optimal Designs for Discrete Choice Experiments in R: -#' The idefix Package. Journal of Statistical Software, 96(3), 1–41, -#' \doi{10.18637/jss.v096.i03} +#' @references Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41 +#' \doi{10.18637/jss.v085.i05} +#' +#' Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, +#' \doi{10.18637/jss.v096.i03} +#' +#' Wheeler B (2022)._AlgDesign: Algorithmic Experimental Design. R package version 1.2.1, +#' \href{https://CRAN.R-project.org/package=AlgDesign}{https://CRAN.R-project.org/package=AlgDesign}. #' @return The returned `design` data frame contains a choice-based conjoint #' survey design where each row is an alternative. It includes the following #' columns: diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index 4dbbcb5..704f4a8 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -283,12 +283,14 @@ design_bayesian <- cbc_design( ) } \references{ -Grömping, U. (2018). R Package DoE.base for Factorial -Experiments. Journal of Statistical Software, 85(5), 1–41, -\doi{10.18637/jss.v085.i05} Traets, F., Sanchez, D. G., & Vandebroek, M. -(2020). Generating Optimal Designs for Discrete Choice Experiments in R: -The idefix Package. Journal of Statistical Software, 96(3), 1–41, +Grömping, U. (2018). R Package DoE.base for Factorial Experiments. Journal of Statistical Software, 85(5), 1–41 +\doi{10.18637/jss.v085.i05} + +Traets, F., Sanchez, D. G., & Vandebroek, M. (2020). Generating Optimal Designs for Discrete Choice Experiments in R: The idefix Package. Journal of Statistical Software, 96(3), 1–41, \doi{10.18637/jss.v096.i03} + +Wheeler B (2022)._AlgDesign: Algorithmic Experimental Design. R package version 1.2.1, +\href{https://CRAN.R-project.org/package=AlgDesign}{https://CRAN.R-project.org/package=AlgDesign}. } \keyword{DoE.base} \keyword{design} From 9977eafce7bdae6602ab600ee9cfa7d8a03926bb Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 08:35:47 -0400 Subject: [PATCH 25/43] Update usage.Rmd --- vignettes/usage.Rmd | 86 +++++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 39 deletions(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 5f55d93..73ec74a 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -75,7 +75,15 @@ restricted_profiles # Generate survey designs -Once a set of profiles is obtained, a conjoint survey can then be generated using the `cbc_design()` function. The `method` argument determines the design method used. Options are: +Once a set of profiles is obtained, a conjoint survey can then be generated using the `cbc_design()` function. The function takes several arguments that are common to all design methods: + +- `profiles`: A data frame of profiles (generated with `cbc_profiles()`) to use in the design (with or without restrictions). +- `n_resp`: The number of respondents in the survey. +- `n_alts`: The number of alternatives per question. +- `n_q`: The number of questions per respondent. +- `method`: The design strategy to use (defaults to `"random"`). + +The `method` argument determines the design method used. Options are: - `"random"` - `"full"` @@ -126,42 +134,6 @@ dim(design_random) head(design_random) ``` -## Labeled designs (a.k.a. "alternative-specific" designs) - -A "labeled" design (also known as "alternative-specific" design) is one where the levels of one attribute are used as a label. This can be created by setting the `label` argument to that attribute. As of now, only `"random"` and `"full"` methods support labeled designs. Since this by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, the `n_alts` argument is overridden. Here is an example of a labeled random design using the `type` attribute as the label: - -```{r} -design_random_labeled <- cbc_design( - profiles = profiles, - n_resp = 900, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - label = "type" # Set the "type" attribute as the label -) - -dim(design_random_labeled) -head(design_random_labeled) -``` - -In the above example, you can see in the first six rows of the survey that the `type` attribute is always fixed to be the same order, ensuring that each level in the `type` attribute will always be shown in each choice question. - -## Designs with a "no choice" option (a.k.a. an "outside good") - -A "no choice" (also known as an "outside good") option can be included by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. - -```{r} -design_nochoice <- cbc_design( - profiles = profiles, - n_resp = 900, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - no_choice = TRUE -) - -dim(design_nochoice) -head(design_nochoice) -``` - ## Full factorial designs The `"full"` method for ("full factorial") creates a design where choice sets are created by randomly sampling from the full set of `profiles` *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created using mutually exclusive subsets of `profiles` within each block. This method produces a design with similar performance with that of the `"random"` method, except the choice sets are repeated and thus there will be many more opportunities for different respondents to see the same choice sets. This method is less efficient than other approaches and may lead to a deficient experiment in smaller sample sizes, though it guarantees equal ability to estimate main and interaction effects. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Gromping2018]. @@ -210,7 +182,7 @@ design_deff <- cbc_design( n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - method = 'orthogonal' + method = 'deff' ) dim(design_deff) @@ -245,7 +217,43 @@ dim(design_bayesian) head(design_bayesian) ``` -Bayesian D-efficient designs that include a "no choice" option should set `no_choice = TRUE` and also define a prior for the "no choice" option using `prior_no_choice`: +## Labeled designs (a.k.a. "alternative-specific" designs) + +A "labeled" design (also known as "alternative-specific" design) is one where the levels of one attribute are used as a label. This can be created by setting the `label` argument to that attribute. As of now, only `"random"` and `"full"` methods support labeled designs. Since this by definition sets the number of alternatives in each question to the number of levels in the chosen attribute, the `n_alts` argument is overridden. Here is an example of a labeled random design using the `type` attribute as the label: + +```{r} +design_random_labeled <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + label = "type" # Set the "type" attribute as the label +) + +dim(design_random_labeled) +head(design_random_labeled) +``` + +In the above example, you can see in the first six rows of the survey that the `type` attribute is always fixed to be the same order, ensuring that each level in the `type` attribute will always be shown in each choice question. + +## Designs with a "no choice" option (a.k.a. an "outside good") + +A "no choice" (also known as an "outside good") option can be included by setting `no_choice = TRUE`. If included, all categorical attributes will be dummy-coded to appropriately dummy-code the "no choice" alternative. All design methods can have a "no choice" added. + +```{r} +design_nochoice <- cbc_design( + profiles = profiles, + n_resp = 900, # Number of respondents + n_alts = 3, # Number of alternatives per question + n_q = 6, # Number of questions per respondent + no_choice = TRUE +) + +dim(design_nochoice) +head(design_nochoice) +``` + +For Bayesian D-efficient designs that include a "no choice" option, a prior for the "no choice" option must also be provided using `prior_no_choice`: ```{r} design_bayesian_no_choice <- cbc_design( From 71b5740dfafe807aa64c4a117d0420bbbf966b14 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 15:26:31 -0400 Subject: [PATCH 26/43] `deff` -> `dopt` for D-optimal designs --- DESCRIPTION | 2 +- R/design.R | 68 ++++++++++++++++++++++----------------------- man/cbc_design.Rd | 44 ++++++++++++++--------------- vignettes/usage.Rmd | 20 ++++++------- 4 files changed, 67 insertions(+), 67 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index df35fe1..ad18ea9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,7 @@ Authors@R: c( role = c("cre", "aut", "cph"), email = "john.helveston@gmail.com", comment = c(ORCID = "0000-0002-2657-9191"))) -Description: Design and evaluate choice-based conjoint survey experiments. Generate a variety of survey designs, including random designs, full factorial designs, orthogonal designs, D-efficient designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct a power analysis for a given survey design by estimating the same model on different subsets of the data to simulate different sample sizes. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . D-efficient designs are obtained using the 'AlgDesign' package (Wheeler, 2022) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Choice simulation and model estimation in power analyses are handled using the 'logitr' package (Helveston, 2023) . +Description: Design and evaluate choice-based conjoint survey experiments. Generate a variety of survey designs, including random designs, full factorial designs, orthogonal designs, D-optimal designs, and Bayesian D-efficient designs as well as designs with "no choice" options and "labeled" (also known as "alternative specific") designs. Conveniently inspect the design balance and overlap, and simulate choice data for a survey design either randomly or according to a multinomial or mixed logit utility model defined by user-provided prior parameters. Conduct a power analysis for a given survey design by estimating the same model on different subsets of the data to simulate different sample sizes. Full factorial and orthogonal designs are obtained using the 'DoE.base' package (Grömping, 2018) . D-optimal designs are obtained using the 'AlgDesign' package (Wheeler, 2022) . Bayesian D-efficient designs are obtained using the 'idefix' package (Traets et al, 2020) . Choice simulation and model estimation in power analyses are handled using the 'logitr' package (Helveston, 2023) . License: MIT + file LICENSE Encoding: UTF-8 LazyData: true diff --git a/R/design.R b/R/design.R index aa00026..5ad6622 100644 --- a/R/design.R +++ b/R/design.R @@ -31,7 +31,7 @@ #' argument will be ignored as its value is defined by the unique number of #' levels in the `label` variable. Defaults to `NULL`. #' @param method Choose the design method to use: `"random"`, `"full"`, -#' `"orthogonal"`, `"deff"`, `"CEA"`, or `"Modfed"`. Defaults to `"random"`. +#' `"orthogonal"`, `"dopt"`, `"CEA"`, or `"Modfed"`. Defaults to `"random"`. #' See details below for complete description of each method. #' @param priors A list of one or more assumed prior parameters used to generate #' a Bayesian D-efficient design. Defaults to `NULL` @@ -41,12 +41,12 @@ #' includes average predicted probabilities for each alternative in each #' choice set given the sample from the prior preference distribution. #' Defaults to `FALSE`.' -#' @param keep_deff If `TRUE`, for D-efficient designs the returned -#' object will be a list containing the design and the D-efficiency score. -#' Defaults to `FALSE`. -#' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the returned -#' object will be a list containing the design and the DB-error score. -#' Defaults to `FALSE`. +#' @param keep_d_eff If `TRUE`, for D-optimal designs (`method = "dopt"`) the +#' returned object will be a list containing the design and the D-efficiency +#' score. Defaults to `FALSE`. +#' @param keep_db_error If `TRUE`, for Bayesian D-efficient designs the +#' returned object will be a list containing the design and the DB-error +#' score. Defaults to `FALSE`. #' @param max_iter A numeric value indicating the maximum number allowed #' iterations when searching for a Bayesian D-efficient design. The default is #' 50. @@ -58,7 +58,7 @@ #' - `"random"` #' - `"full"` #' - `"orthogonal"` -#' - `"deff"` +#' - `"dopt"` #' - `"CEA"` #' - `"Modfed"` #' @@ -77,7 +77,7 @@ #' `"random"` | ✅ | ✅ | ✅ | ❌ #' `"full"` | ✅ | ✅ | ✅ | ✅ #' `"orthogonal"` | ✅ | ❌ | ❌ | ✅ -#' `"deff"` | ✅ | ❌ | ✅ | ✅ +#' `"dopt"` | ✅ | ❌ | ✅ | ✅ #' `"CEA"` | ✅ | ❌ | ❌ | ✅ #' `"Modfed"` | ✅ | ❌ | ✅ | ✅ #' @@ -116,15 +116,15 @@ #' more information about orthogonal designs, see `?DoE.base::oa.design` as #' well as the JSS article on the {DoE.base} package (Grömping, 2018). #' -#' The `"deff"` method creates a "D-efficient" design where an array from -#' `profiles` is found that maximizes the D efficiency using the Federov -#' algorithm, with the total number of unique choice sets determined by -#' `n_q*n_blocks`. Choice sets are then created by randomly sampling from this -#' array *without replacement*. The choice sets are then repeated to meet the -#' desired number of survey respondents (determined by `n_resp`). If blocking -#' is used, choice set blocks are created from the D-efficient array. For more -#' information about the underlying algorithm for this method, see -#' `?AlgDesign::optFederov`. +#' The `"dopt"` method creates a "D-optimal" design where an array from +#' `profiles` is found that maximizes the D-efficiency of a linear model +#' using the Federov algorithm, with the total number of unique choice sets +#' determined by `n_q*n_blocks`. Choice sets are then created by randomly +#' sampling from this array *without replacement*. The choice sets are then +#' repeated to meet the desired number of survey respondents (determined by +#' `n_resp`). If blocking is used, choice set blocks are created from the +#' D-efficient array. For more information about the underlying algorithm +#' for this method, see `?AlgDesign::optFederov`. #' #' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a #' Bayesian D-efficient design for the choice sets, with the total number of @@ -196,13 +196,13 @@ #' method = 'orthogonal' #' ) #' -#' # Make a D-efficient design -#' design_deff <- cbc_design( +#' # Make a D-optimal design +#' design_dopt <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6, # Number of questions per respondent -#' method = 'deff' +#' method = 'dopt' #' ) #' #' # Make a survey by randomly sampling from all possible profiles @@ -255,7 +255,7 @@ cbc_design <- function( priors = NULL, prior_no_choice = NULL, probs = FALSE, - keep_deff = FALSE, + keep_d_eff = FALSE, keep_db_error = FALSE, max_iter = 50, parallel = FALSE @@ -276,7 +276,7 @@ cbc_design <- function( priors, prior_no_choice, probs, - keep_deff, + keep_d_eff, keep_db_error, max_iter, parallel, @@ -295,9 +295,9 @@ cbc_design <- function( design <- make_design_orthogonal( profiles, n_resp, n_alts, n_q, n_blocks, no_choice ) - } else if (method == 'deff') { - design <- make_design_deff( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_deff + } else if (method == 'dopt') { + design <- make_design_dopt( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_d_eff ) } else { design <- make_design_bayesian( @@ -729,14 +729,14 @@ make_design_orthogonal <- function( return(design) } -# D-Efficient Design ---- +# D-optimal Design ---- -make_design_deff <- function( - profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_deff +make_design_dopt <- function( + profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_d_eff ) { # First obtain the d-efficient array des <- AlgDesign::optFederov(~., profiles[,2:length(profiles)], n_q*n_blocks) - deff <- des$Ge + d_eff <- des$Ge profiles <- merge(des$design, profiles, all.x = TRUE) profiles <- profiles[c( 'profileID', names(profiles)[which(names(profiles) != 'profileID')])] @@ -751,11 +751,11 @@ make_design_deff <- function( design <- add_no_choice(design, n_alts) } # Print D-efficiency - message("D-efficient design found with D-efficiency of ", round(deff, 5)) + message("D-efficient design found with D-efficiency of ", round(d_eff, 5)) - # Return list containing the design and DB error if keep_deff = TRUE - if (keep_deff) { - return(list(design = design, deff = deff)) + # Return list containing the design and DB error if keep_d_eff = TRUE + if (keep_d_eff) { + return(list(design = design, d_eff = d_eff)) } return(design) } diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index 704f4a8..d444b7d 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -18,7 +18,7 @@ cbc_design( priors = NULL, prior_no_choice = NULL, probs = FALSE, - keep_deff = FALSE, + keep_d_eff = FALSE, keep_db_error = FALSE, max_iter = 50, parallel = FALSE @@ -58,7 +58,7 @@ argument will be ignored as its value is defined by the unique number of levels in the \code{label} variable. Defaults to \code{NULL}.} \item{method}{Choose the design method to use: \code{"random"}, \code{"full"}, -\code{"orthogonal"}, \code{"deff"}, \code{"CEA"}, or \code{"Modfed"}. Defaults to \code{"random"}. +\code{"orthogonal"}, \code{"dopt"}, \code{"CEA"}, or \code{"Modfed"}. Defaults to \code{"random"}. See details below for complete description of each method.} \item{priors}{A list of one or more assumed prior parameters used to generate @@ -72,13 +72,13 @@ includes average predicted probabilities for each alternative in each choice set given the sample from the prior preference distribution. Defaults to \code{FALSE}.'} -\item{keep_deff}{If \code{TRUE}, for D-efficient designs the returned -object will be a list containing the design and the D-efficiency score. -Defaults to \code{FALSE}.} +\item{keep_d_eff}{If \code{TRUE}, for D-optimal designs (\code{method = "dopt"}) the +returned object will be a list containing the design and the D-efficiency +score. Defaults to \code{FALSE}.} -\item{keep_db_error}{If \code{TRUE}, for Bayesian D-efficient designs the returned -object will be a list containing the design and the DB-error score. -Defaults to \code{FALSE}.} +\item{keep_db_error}{If \code{TRUE}, for Bayesian D-efficient designs the +returned object will be a list containing the design and the DB-error +score. Defaults to \code{FALSE}.} \item{max_iter}{A numeric value indicating the maximum number allowed iterations when searching for a Bayesian D-efficient design. The default is @@ -114,7 +114,7 @@ are: \item \code{"random"} \item \code{"full"} \item \code{"orthogonal"} -\item \code{"deff"} +\item \code{"dopt"} \item \code{"CEA"} \item \code{"Modfed"} @@ -132,7 +132,7 @@ of restricted profile, and the use of blocking.\tabular{lllll}{ \code{"random"} \tab ✅ \tab ✅ \tab ✅ \tab ❌ \cr \code{"full"} \tab ✅ \tab ✅ \tab ✅ \tab ✅ \cr \code{"orthogonal"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr - \code{"deff"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr + \code{"dopt"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr \code{"CEA"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr \code{"Modfed"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr } @@ -173,15 +173,15 @@ main effects at the expense of information about interaction effects. For more information about orthogonal designs, see \code{?DoE.base::oa.design} as well as the JSS article on the {DoE.base} package (Grömping, 2018). -The \code{"deff"} method creates a "D-efficient" design where an array from -\code{profiles} is found that maximizes the D efficiency using the Federov -algorithm, with the total number of unique choice sets determined by -\code{n_q*n_blocks}. Choice sets are then created by randomly sampling from this -array \emph{without replacement}. The choice sets are then repeated to meet the -desired number of survey respondents (determined by \code{n_resp}). If blocking -is used, choice set blocks are created from the D-efficient array. For more -information about the underlying algorithm for this method, see -\code{?AlgDesign::optFederov}. +The \code{"dopt"} method creates a "D-optimal" design where an array from +\code{profiles} is found that maximizes the D-efficiency of a linear model +using the Federov algorithm, with the total number of unique choice sets +determined by \code{n_q*n_blocks}. Choice sets are then created by randomly +sampling from this array \emph{without replacement}. The choice sets are then +repeated to meet the desired number of survey respondents (determined by +\code{n_resp}). If blocking is used, choice set blocks are created from the +D-efficient array. For more information about the underlying algorithm +for this method, see \code{?AlgDesign::optFederov}. The \code{"CEA"} and \code{"Modfed"} methods use the specified \code{priors} to create a Bayesian D-efficient design for the choice sets, with the total number of @@ -236,13 +236,13 @@ design_orthogonal <- cbc_design( method = 'orthogonal' ) -# Make a D-efficient design -design_deff <- cbc_design( +# Make a D-optimal design +design_dopt <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - method = 'deff' + method = 'dopt' ) # Make a survey by randomly sampling from all possible profiles diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 73ec74a..055f993 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -88,7 +88,7 @@ The `method` argument determines the design method used. Options are: - `"random"` - `"full"` - `"orthogonal"` -- `"deff"` +- `"dopt"` - `"CEA"` - `"Modfed"` @@ -104,7 +104,7 @@ Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocki `"random"` | ✅ | ✅ | ✅ | ❌ `"full"` | ✅ | ✅ | ✅ | ✅ `"orthogonal"` | ✅ | ❌ | ❌ | ✅ -`"deff"` | ✅ | ❌ | ✅ | ✅ +`"dopt"` | ✅ | ❌ | ✅ | ✅ `"CEA"` | ✅ | ❌ | ❌ | ✅ `"Modfed"` | ✅ | ❌ | ✅ | ✅ @@ -136,7 +136,7 @@ head(design_random) ## Full factorial designs -The `"full"` method for ("full factorial") creates a design where choice sets are created by randomly sampling from the full set of `profiles` *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created using mutually exclusive subsets of `profiles` within each block. This method produces a design with similar performance with that of the `"random"` method, except the choice sets are repeated and thus there will be many more opportunities for different respondents to see the same choice sets. This method is less efficient than other approaches and may lead to a deficient experiment in smaller sample sizes, though it guarantees equal ability to estimate main and interaction effects. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Gromping2018]. +The `"full"` method for ("full factorial") creates a design where choice sets are created by randomly sampling from the full set of `profiles` *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created using mutually exclusive subsets of `profiles` within each block. This method produces a design with similar performance with that of the `"random"` method, except the choice sets are repeated and thus there will be many more opportunities for different respondents to see the same choice sets. For more information about blocking with full factorial designs, see `?DoE.base::fac.design` as well as the JSS article on the {DoE.base} package [@Gromping2018]. ```{r} design_full <- cbc_design( @@ -170,23 +170,23 @@ dim(design_orthogonal) head(design_orthogonal) ``` -## D-efficient designs +## D-optimal designs -The `"deff"` method creates a "D-efficient" design where an array from `profiles` is found that maximizes the D-efficiency using the Federov algorithm, with the total number of unique choice sets determined by `n_q*n_blocks`. The optimization is handled using the [{AlgDesign} package](https://cran.r-project.org/package=AlgDesign). Choice sets are then created by randomly sampling from this array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created from the D-efficient array. For more information about the underlying algorithm for this method, see `?AlgDesign::optFederov` [@Wheeler2022]. +The `"dopt"` method creates a "D-optimal" design where an array from `profiles` is found that maximizes the D-efficiency of a linear model using the Federov algorithm, with the total number of unique choice sets determined by `n_q*n_blocks`. The optimization is handled using the [{AlgDesign} package](https://cran.r-project.org/package=AlgDesign). Choice sets are then created by randomly sampling from this array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created from the D-efficient array. For more information about the underlying algorithm for this method, see `?AlgDesign::optFederov` [@Wheeler2022]. ```{r} #| message: true -design_deff <- cbc_design( +design_dopt <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - method = 'deff' + method = 'dopt' ) -dim(design_deff) -head(design_deff) +dim(design_dopt) +head(design_dopt) ``` ## Bayesian D-efficient designs @@ -234,7 +234,7 @@ dim(design_random_labeled) head(design_random_labeled) ``` -In the above example, you can see in the first six rows of the survey that the `type` attribute is always fixed to be the same order, ensuring that each level in the `type` attribute will always be shown in each choice question. +In the above example, the `type` attribute is now fixed to be the same order for every choice question, ensuring that each level in the `type` attribute will always be shown in each choice question. ## Designs with a "no choice" option (a.k.a. an "outside good") From 15d69fa25027b02c8f8f66814306cc14dcd0e2f3 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 15:30:56 -0400 Subject: [PATCH 27/43] Update input_checks.R --- R/input_checks.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/input_checks.R b/R/input_checks.R index 33fadd7..27e518b 100644 --- a/R/input_checks.R +++ b/R/input_checks.R @@ -31,11 +31,11 @@ check_design_method <- function(method, priors) { # Check that an appropriate method is used if (! method %in% c( - 'random', 'full', 'orthogonal', 'deff', 'CEA', 'Modfed' + 'random', 'full', 'orthogonal', 'dopt', 'CEA', 'Modfed' )) { stop( 'The "method" argument must be set to "random", "full", ', - '"orthogonal", "deff", "CEA", or "Modfed"' + '"orthogonal", "dopt", "CEA", or "Modfed"' ) } @@ -71,7 +71,7 @@ check_inputs_design <- function( priors, prior_no_choice, probs, - keep_deff, + keep_d_eff, keep_db_error, max_iter, parallel, @@ -119,7 +119,7 @@ check_inputs_design <- function( if (!method %in% c('random', 'full')) { stop( 'Restricted profile sets can only be used with the "random", "full" ', - '"deff", or "Modfed" methods' + '"dopt", or "Modfed" methods' ) } } From 7f1af0a4a63d569945736696954d2ad808105345 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 15:38:28 -0400 Subject: [PATCH 28/43] more small fixes to language --- R/design.R | 6 +++--- man/cbc_design.Rd | 2 +- vignettes/usage.Rmd | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/R/design.R b/R/design.R index 5ad6622..b12d7fe 100644 --- a/R/design.R +++ b/R/design.R @@ -123,7 +123,7 @@ #' sampling from this array *without replacement*. The choice sets are then #' repeated to meet the desired number of survey respondents (determined by #' `n_resp`). If blocking is used, choice set blocks are created from the -#' D-efficient array. For more information about the underlying algorithm +#' D-optimal array. For more information about the underlying algorithm #' for this method, see `?AlgDesign::optFederov`. #' #' The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a @@ -734,7 +734,7 @@ make_design_orthogonal <- function( make_design_dopt <- function( profiles, n_resp, n_alts, n_q, n_blocks, no_choice, keep_d_eff ) { - # First obtain the d-efficient array + # First obtain the d-optimal array des <- AlgDesign::optFederov(~., profiles[,2:length(profiles)], n_q*n_blocks) d_eff <- des$Ge profiles <- merge(des$design, profiles, all.x = TRUE) @@ -751,7 +751,7 @@ make_design_dopt <- function( design <- add_no_choice(design, n_alts) } # Print D-efficiency - message("D-efficient design found with D-efficiency of ", round(d_eff, 5)) + message("D-optimal design found with D-efficiency of ", round(d_eff, 5)) # Return list containing the design and DB error if keep_d_eff = TRUE if (keep_d_eff) { diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index d444b7d..c4fd5ea 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -180,7 +180,7 @@ determined by \code{n_q*n_blocks}. Choice sets are then created by randomly sampling from this array \emph{without replacement}. The choice sets are then repeated to meet the desired number of survey respondents (determined by \code{n_resp}). If blocking is used, choice set blocks are created from the -D-efficient array. For more information about the underlying algorithm +D-optimal array. For more information about the underlying algorithm for this method, see \code{?AlgDesign::optFederov}. The \code{"CEA"} and \code{"Modfed"} methods use the specified \code{priors} to create a diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 055f993..6b4c5be 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -172,7 +172,7 @@ head(design_orthogonal) ## D-optimal designs -The `"dopt"` method creates a "D-optimal" design where an array from `profiles` is found that maximizes the D-efficiency of a linear model using the Federov algorithm, with the total number of unique choice sets determined by `n_q*n_blocks`. The optimization is handled using the [{AlgDesign} package](https://cran.r-project.org/package=AlgDesign). Choice sets are then created by randomly sampling from this array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created from the D-efficient array. For more information about the underlying algorithm for this method, see `?AlgDesign::optFederov` [@Wheeler2022]. +The `"dopt"` method creates a "D-optimal" design where an array from `profiles` is found that maximizes the D-efficiency of a linear model using the Federov algorithm, with the total number of unique choice sets determined by `n_q*n_blocks`. The optimization is handled using the [{AlgDesign} package](https://cran.r-project.org/package=AlgDesign). Choice sets are then created by randomly sampling from this array *without replacement*. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). If blocking is used, choice set blocks are created from the D-optimal array. For more information about the underlying algorithm for this method, see `?AlgDesign::optFederov` [@Wheeler2022]. ```{r} #| message: true From 1b2de3fbc33138bd16c68d2d3b2f55e8fc942427 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 15:54:03 -0400 Subject: [PATCH 29/43] version bump --- DESCRIPTION | 2 +- NEWS.md | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ad18ea9..5993a0d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: cbcTools Title: Choice-Based Conjoint Experiment Design Generation and Power Evaluation in R -Version: 0.4.0 +Version: 0.5.0 Maintainer: John Helveston Authors@R: c( person(given = "John", diff --git a/NEWS.md b/NEWS.md index 6f8c68b..3aefa11 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # cbcTools (development version) +# cbcTools 0.5.0 + +- Further revisions to the `method` argument in the `cbc_design()` function. +- Added the `"random"` and `"dopt"` methods. - Added restrictions so that orthogonal designs cannot use the `label` argument or restricted profile sets (as either of these would result in a non-orthogonal design). # cbcTools 0.4.0 From e807ad50788b3ce3819a8b62ecd6f1b088cfe252 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 16:14:12 -0400 Subject: [PATCH 30/43] added latex for table in pdf form --- vignettes/usage.Rmd | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 6b4c5be..5164d51 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -5,9 +5,15 @@ vignette: > %\VignetteIndexEntry{Usage} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} + \usepackage{xcolor} + \usepackage{bbding} bibliography: "`r here::here('vignettes', 'library.bib')`" +classoption: dvipsnames # For other colors in xcolors --- +\newcommand{\checkgreen}{\textcolor{Green}{\Checkmark}} +\newcommand{\xred}{\textcolor{red}{\XSolid}} + ```{r setup, include=FALSE, message=FALSE, warning=FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -99,6 +105,32 @@ All methods ensure that the two following criteria are met: The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. +```{r, eval=knitr::is_latex_output(), echo=FALSE} +knitr::asis_output(' +\begin{table}[ht] + \caption{Summary of addition options supported by each design method.} + \vspace{-20pt} + \begin{center} + \renewcommand{\arraystretch}{1.1} + \begin{tabular}{lllll} + \toprule[1.5pt] + Method & Include no choice & Labeled designs & Use restricted profiles & Blocking \\ + \midrule + \code{"random"} & \checkgreen & \checkgreen & \checkgreen & \xred \\ + \code{"full"} & \checkgreen & \checkgreen & \checkgreen & \checkgreen \\ + \code{"orthogonal"} & \checkgreen & \xred & \xred & \checkgreen \\ + \code{"dopt"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ + \code{"CEA"} & \checkgreen & \xred & \xred & \checkgreen \\ + \code{"Modfed"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ + \bottomrule[1pt] + \end{tabular} + \end{center} +\end{table} +') +``` + +```{r, eval=knitr::is_html_output(), echo=FALSE} +knitr::asis_output(' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? ---|---|---|---|--- `"random"` | ✅ | ✅ | ✅ | ❌ @@ -107,6 +139,8 @@ Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocki `"dopt"` | ✅ | ❌ | ✅ | ✅ `"CEA"` | ✅ | ❌ | ❌ | ✅ `"Modfed"` | ✅ | ❌ | ✅ | ✅ +') +``` The returned `design` data frame contains a choice-based conjoint survey design where each row is an alternative. It includes the following columns: From 122fe7acc3c3dc2a6d71eaacfff45a2ebe3768fa Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 16:20:22 -0400 Subject: [PATCH 31/43] got rid of unicode syms...just too hard to get working with pdf :( --- vignettes/usage.Rmd | 46 ++++++--------------------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 5164d51..4741837 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -5,15 +5,9 @@ vignette: > %\VignetteIndexEntry{Usage} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} - \usepackage{xcolor} - \usepackage{bbding} bibliography: "`r here::here('vignettes', 'library.bib')`" -classoption: dvipsnames # For other colors in xcolors --- -\newcommand{\checkgreen}{\textcolor{Green}{\Checkmark}} -\newcommand{\xred}{\textcolor{red}{\XSolid}} - ```{r setup, include=FALSE, message=FALSE, warning=FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -105,42 +99,14 @@ All methods ensure that the two following criteria are met: The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. -```{r, eval=knitr::is_latex_output(), echo=FALSE} -knitr::asis_output(' -\begin{table}[ht] - \caption{Summary of addition options supported by each design method.} - \vspace{-20pt} - \begin{center} - \renewcommand{\arraystretch}{1.1} - \begin{tabular}{lllll} - \toprule[1.5pt] - Method & Include no choice & Labeled designs & Use restricted profiles & Blocking \\ - \midrule - \code{"random"} & \checkgreen & \checkgreen & \checkgreen & \xred \\ - \code{"full"} & \checkgreen & \checkgreen & \checkgreen & \checkgreen \\ - \code{"orthogonal"} & \checkgreen & \xred & \xred & \checkgreen \\ - \code{"dopt"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ - \code{"CEA"} & \checkgreen & \xred & \xred & \checkgreen \\ - \code{"Modfed"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ - \bottomrule[1pt] - \end{tabular} - \end{center} -\end{table} -') -``` - -```{r, eval=knitr::is_html_output(), echo=FALSE} -knitr::asis_output(' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? ---|---|---|---|--- -`"random"` | ✅ | ✅ | ✅ | ❌ -`"full"` | ✅ | ✅ | ✅ | ✅ -`"orthogonal"` | ✅ | ❌ | ❌ | ✅ -`"dopt"` | ✅ | ❌ | ✅ | ✅ -`"CEA"` | ✅ | ❌ | ❌ | ✅ -`"Modfed"` | ✅ | ❌ | ✅ | ✅ -') -``` +`"random"` | Yes | Yes | Yes | No +`"full"` | Yes | Yes | Yes | Yes +`"orthogonal"` | Yes | No | No | Yes +`"dopt"` | Yes | No | Yes | Yes +`"CEA"` | Yes | No | No | Yes +`"Modfed"` | Yes | No | Yes | Yes The returned `design` data frame contains a choice-based conjoint survey design where each row is an alternative. It includes the following columns: From 159feba469a61c878d94ae776d25157833c2322c Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 16:23:46 -0400 Subject: [PATCH 32/43] Revert "got rid of unicode syms...just too hard to get working with pdf :(" This reverts commit 122fe7acc3c3dc2a6d71eaacfff45a2ebe3768fa. --- vignettes/usage.Rmd | 46 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 4741837..5164d51 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -5,9 +5,15 @@ vignette: > %\VignetteIndexEntry{Usage} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} + \usepackage{xcolor} + \usepackage{bbding} bibliography: "`r here::here('vignettes', 'library.bib')`" +classoption: dvipsnames # For other colors in xcolors --- +\newcommand{\checkgreen}{\textcolor{Green}{\Checkmark}} +\newcommand{\xred}{\textcolor{red}{\XSolid}} + ```{r setup, include=FALSE, message=FALSE, warning=FALSE} knitr::opts_chunk$set( collapse = TRUE, @@ -99,14 +105,42 @@ All methods ensure that the two following criteria are met: The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. +```{r, eval=knitr::is_latex_output(), echo=FALSE} +knitr::asis_output(' +\begin{table}[ht] + \caption{Summary of addition options supported by each design method.} + \vspace{-20pt} + \begin{center} + \renewcommand{\arraystretch}{1.1} + \begin{tabular}{lllll} + \toprule[1.5pt] + Method & Include no choice & Labeled designs & Use restricted profiles & Blocking \\ + \midrule + \code{"random"} & \checkgreen & \checkgreen & \checkgreen & \xred \\ + \code{"full"} & \checkgreen & \checkgreen & \checkgreen & \checkgreen \\ + \code{"orthogonal"} & \checkgreen & \xred & \xred & \checkgreen \\ + \code{"dopt"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ + \code{"CEA"} & \checkgreen & \xred & \xred & \checkgreen \\ + \code{"Modfed"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ + \bottomrule[1pt] + \end{tabular} + \end{center} +\end{table} +') +``` + +```{r, eval=knitr::is_html_output(), echo=FALSE} +knitr::asis_output(' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? ---|---|---|---|--- -`"random"` | Yes | Yes | Yes | No -`"full"` | Yes | Yes | Yes | Yes -`"orthogonal"` | Yes | No | No | Yes -`"dopt"` | Yes | No | Yes | Yes -`"CEA"` | Yes | No | No | Yes -`"Modfed"` | Yes | No | Yes | Yes +`"random"` | ✅ | ✅ | ✅ | ❌ +`"full"` | ✅ | ✅ | ✅ | ✅ +`"orthogonal"` | ✅ | ❌ | ❌ | ✅ +`"dopt"` | ✅ | ❌ | ✅ | ✅ +`"CEA"` | ✅ | ❌ | ❌ | ✅ +`"Modfed"` | ✅ | ❌ | ✅ | ✅ +') +``` The returned `design` data frame contains a choice-based conjoint survey design where each row is an alternative. It includes the following columns: From ae844917aef26586c16602fd6982775c26495f8c Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Fri, 7 Jul 2023 16:24:30 -0400 Subject: [PATCH 33/43] Update .Rbuildignore --- .Rbuildignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 8634dbd..b675ab6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -4,13 +4,10 @@ ^docs$ ^pkgdown$ ^\.github$ -vignettes ^README\.Rmd$ ^LICENSE\.md$ -^cjTools\.Rproj$ ^doc$ ^Meta$ -^next_release\.md$ ^build\.R$ ^foo\.R$ ^cran-comments\.md$ From 7c4912a3f99cab0ddd4f27500a4b126ae3cea0c9 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:07:39 -0400 Subject: [PATCH 34/43] added VignetteBuilder to description --- DESCRIPTION | 2 ++ 1 file changed, 2 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 5993a0d..23a73ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,10 +14,12 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.2.3 +VignetteBuilder: knitr Depends: R (>= 3.5.0) Suggests: here, + knitr, testthat, tibble Imports: From 2c2ca5fdb48c68055893d24ddc76018686ecf68b Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:23:16 -0400 Subject: [PATCH 35/43] remove beautiful html table cuz pkg vignette is a pdf :( --- vignettes/usage.Rmd | 40 ++++++---------------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 5164d51..828ee00 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -105,42 +105,14 @@ All methods ensure that the two following criteria are met: The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. -```{r, eval=knitr::is_latex_output(), echo=FALSE} -knitr::asis_output(' -\begin{table}[ht] - \caption{Summary of addition options supported by each design method.} - \vspace{-20pt} - \begin{center} - \renewcommand{\arraystretch}{1.1} - \begin{tabular}{lllll} - \toprule[1.5pt] - Method & Include no choice & Labeled designs & Use restricted profiles & Blocking \\ - \midrule - \code{"random"} & \checkgreen & \checkgreen & \checkgreen & \xred \\ - \code{"full"} & \checkgreen & \checkgreen & \checkgreen & \checkgreen \\ - \code{"orthogonal"} & \checkgreen & \xred & \xred & \checkgreen \\ - \code{"dopt"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ - \code{"CEA"} & \checkgreen & \xred & \xred & \checkgreen \\ - \code{"Modfed"} & \checkgreen & \xred & \checkgreen & \checkgreen \\ - \bottomrule[1pt] - \end{tabular} - \end{center} -\end{table} -') -``` - -```{r, eval=knitr::is_html_output(), echo=FALSE} -knitr::asis_output(' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? ---|---|---|---|--- -`"random"` | ✅ | ✅ | ✅ | ❌ -`"full"` | ✅ | ✅ | ✅ | ✅ -`"orthogonal"` | ✅ | ❌ | ❌ | ✅ -`"dopt"` | ✅ | ❌ | ✅ | ✅ -`"CEA"` | ✅ | ❌ | ❌ | ✅ -`"Modfed"` | ✅ | ❌ | ✅ | ✅ -') -``` +`"random"` | Yes | Yes | Yes | No +`"full"` | Yes | Yes | Yes | Yes +`"orthogonal"` | Yes | No | No | Yes +`"dopt"` | Yes | No | Yes | Yes +`"CEA"` | Yes | No | No | Yes +`"Modfed"` | Yes | No | Yes | Yes The returned `design` data frame contains a choice-based conjoint survey design where each row is an alternative. It includes the following columns: From 51122d564a6d485fa8bf866cc0b002e1d3e9d35d Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:26:56 -0400 Subject: [PATCH 36/43] fix idefix url --- vignettes/usage.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 828ee00..f2ec4dd 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -197,7 +197,7 @@ head(design_dopt) ## Bayesian D-efficient designs -The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a Bayesian D-efficient design for the choice sets, with the total number of unique choice sets determined by `n_q*n_blocks`. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). These designs are optimized to minimize the D-error of the design given a prior model, which is handled using the [{idefix} package](https://cran.r-project.org/web/packages/idefix/index.html). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. If `"CEA"` or `"Modfed"` is used without specifying `priors`, a prior of all `0`s will be used and a warning message stating this will be shown. In the opposite case, if `priors` are specified but neither Bayesian method is used, the `"CEA"` method will be used and a warning stating this will be shown. Restricted sets of `profiles` can only be used with `"Modfed"`. For more details on Bayesian D-efficient designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the {idefix} package [@Traets2020]. +The `"CEA"` and `"Modfed"` methods use the specified `priors` to create a Bayesian D-efficient design for the choice sets, with the total number of unique choice sets determined by `n_q*n_blocks`. The choice sets are then repeated to meet the desired number of survey respondents (determined by `n_resp`). These designs are optimized to minimize the D-error of the design given a prior model, which is handled using the [{idefix} package](https://CRAN.R-project.org/package=idefix). For now, designs are limited to multinomial logit priors (the {idefix} package can generate designs with mixed logit priors). These designs also currently do not support the ability to specify interaction terms in the prior model or use "labeled" designs. If `"CEA"` or `"Modfed"` is used without specifying `priors`, a prior of all `0`s will be used and a warning message stating this will be shown. In the opposite case, if `priors` are specified but neither Bayesian method is used, the `"CEA"` method will be used and a warning stating this will be shown. Restricted sets of `profiles` can only be used with `"Modfed"`. For more details on Bayesian D-efficient designs, see `?idefix::CEA` and `?idefix::Modfed` as well as the JSS article on the {idefix} package [@Traets2020]. In the example below, the prior model assumes the following parameters: From 3201a9889d0a17f306d9ff4a55768de940ad6211 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:35:20 -0400 Subject: [PATCH 37/43] remove pretty table in `cbc_design` documentation --- R/design.R | 12 ++++++------ man/cbc_design.Rd | 12 ++++++------ vignettes/usage.Rmd | 4 ---- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/R/design.R b/R/design.R index b12d7fe..f901408 100644 --- a/R/design.R +++ b/R/design.R @@ -74,12 +74,12 @@ #' #' Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? #' ---|---|---|---|--- -#' `"random"` | ✅ | ✅ | ✅ | ❌ -#' `"full"` | ✅ | ✅ | ✅ | ✅ -#' `"orthogonal"` | ✅ | ❌ | ❌ | ✅ -#' `"dopt"` | ✅ | ❌ | ✅ | ✅ -#' `"CEA"` | ✅ | ❌ | ❌ | ✅ -#' `"Modfed"` | ✅ | ❌ | ✅ | ✅ +#' `"random"` | Yes | Yes | Yes | No +#' `"full"` | Yes | Yes | Yes | Yes +#' `"orthogonal"` | Yes | No | No | Yes +#' `"dopt"` | Yes | No | Yes | Yes +#' `"CEA"` | Yes | No | No | Yes +#' `"Modfed"` | Yes | No | Yes | Yes #' #' The `"random"` method (the default) creates a design where choice sets are #' created by randomly sampling from the full set of `profiles` *with diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index c4fd5ea..ae66fc0 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -129,12 +129,12 @@ including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking.\tabular{lllll}{ Method \tab Include "no choice"? \tab Labeled designs? \tab Restricted profiles? \tab Blocking? \cr - \code{"random"} \tab ✅ \tab ✅ \tab ✅ \tab ❌ \cr - \code{"full"} \tab ✅ \tab ✅ \tab ✅ \tab ✅ \cr - \code{"orthogonal"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr - \code{"dopt"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr - \code{"CEA"} \tab ✅ \tab ❌ \tab ❌ \tab ✅ \cr - \code{"Modfed"} \tab ✅ \tab ❌ \tab ✅ \tab ✅ \cr + \code{"random"} \tab Yes \tab Yes \tab Yes \tab No \cr + \code{"full"} \tab Yes \tab Yes \tab Yes \tab Yes \cr + \code{"orthogonal"} \tab Yes \tab No \tab No \tab Yes \cr + \code{"dopt"} \tab Yes \tab No \tab Yes \tab Yes \cr + \code{"CEA"} \tab Yes \tab No \tab No \tab Yes \cr + \code{"Modfed"} \tab Yes \tab No \tab Yes \tab Yes \cr } diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index f2ec4dd..1ea579e 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -8,12 +8,8 @@ vignette: > \usepackage{xcolor} \usepackage{bbding} bibliography: "`r here::here('vignettes', 'library.bib')`" -classoption: dvipsnames # For other colors in xcolors --- -\newcommand{\checkgreen}{\textcolor{Green}{\Checkmark}} -\newcommand{\xred}{\textcolor{red}{\XSolid}} - ```{r setup, include=FALSE, message=FALSE, warning=FALSE} knitr::opts_chunk$set( collapse = TRUE, From fca31de3171d8e3307b5814085127d121a22522a Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:52:05 -0400 Subject: [PATCH 38/43] added seed to vignette --- vignettes/usage.Rmd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 1ea579e..5e6c9fe 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -124,6 +124,8 @@ The returned `design` data frame contains a choice-based conjoint survey design The `"random"` method (the default) creates a design where choice sets are created by randomly sampling from the full set of `profiles` *with* replacement. This means that few (if any) respondents will see the same sets of choice sets. This method is less efficient than other approaches and may lead to a deficient experiment in smaller sample sizes, though it guarantees equal ability to estimate main and interaction effects. ```{r} +set.seed(5678) + design_random <- cbc_design( profiles = profiles, n_resp = 900, # Number of respondents From a821d9bb6bdda8871830ab90ddd0bef1ad9655ad Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 16:58:07 -0400 Subject: [PATCH 39/43] Create CRAN-SUBMISSION --- CRAN-SUBMISSION | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 CRAN-SUBMISSION diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION new file mode 100644 index 0000000..485b557 --- /dev/null +++ b/CRAN-SUBMISSION @@ -0,0 +1,3 @@ +Version: 0.5.0 +Date: 2023-07-10 20:57:55 UTC +SHA: fca31de3171d8e3307b5814085127d121a22522a From e862e8640e33a35c32c9eb250bbbd7eb5b93d6e3 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 19:27:01 -0400 Subject: [PATCH 40/43] reducing example run times --- R/design.R | 31 ++----------------------------- R/power.R | 18 ++++++++++-------- man/cbc_design.Rd | 31 ++----------------------------- man/plot_compare_power.Rd | 18 ++++++++++-------- 4 files changed, 24 insertions(+), 74 deletions(-) diff --git a/R/design.R b/R/design.R index f901408..25cff31 100644 --- a/R/design.R +++ b/R/design.R @@ -178,40 +178,13 @@ #' n_q = 6 # Number of questions per respondent #' ) #' -#' # Make a survey using a full factorial design +#' # Make a survey using a full factorial design and include a "no choice" option #' design_full <- cbc_design( #' profiles = profiles, #' n_resp = 100, # Number of respondents #' n_alts = 3, # Number of alternatives per question #' n_q = 6, # Number of questions per respondent -#' method = 'full' -#' ) -#' -#' # Make a survey from an orthogonal array of profiles -#' design_orthogonal <- cbc_design( -#' profiles = profiles, -#' n_resp = 100, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6, # Number of questions per respondent -#' method = 'orthogonal' -#' ) -#' -#' # Make a D-optimal design -#' design_dopt <- cbc_design( -#' profiles = profiles, -#' n_resp = 100, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6, # Number of questions per respondent -#' method = 'dopt' -#' ) -#' -#' # Make a survey by randomly sampling from all possible profiles -#' # and include a "no choice" option -#' design_random_nochoice <- cbc_design( -#' profiles = profiles, -#' n_resp = 100, # Number of respondents -#' n_alts = 3, # Number of alternatives per question -#' n_q = 6, # Number of questions per respondent +#' method = 'full', # Change this to use a different method, e.g. 'orthogonal', or 'dopt' #' no_choice = TRUE #' ) #' diff --git a/R/power.R b/R/power.R index c49fad0..3b6c8f6 100644 --- a/R/power.R +++ b/R/power.R @@ -249,19 +249,20 @@ extract_errors <- function(models) { #' @importFrom rlang .data #' @export #' @examples +#' \dontrun{ #' library(cbcTools) #' #' # Generate all possible profiles #' profiles <- cbc_profiles( -#' price = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5), +#' price = c(1, 1.5, 2, 2.5, 3), #' type = c("Fuji", "Gala", "Honeycrisp"), #' freshness = c('Poor', 'Average', 'Excellent') #' ) #' #' # Make designs to compare: full factorial vs bayesian d-efficient -#' design_full <- cbc_design( +#' design_random <- cbc_design( #' profiles = profiles, -#' n_resp = 300, n_alts = 3, n_q = 6 +#' n_resp = 100, n_alts = 3, n_q = 6 #' ) #' # Same priors will be used in bayesian design and simulated choices #' priors <- list( @@ -271,26 +272,27 @@ extract_errors <- function(models) { #' ) #' design_bayesian <- cbc_design( #' profiles = profiles, -#' n_resp = 300, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", +#' n_resp = 100, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", #' priors = priors, parallel = FALSE #' ) #' #' # Obtain power for each design by simulating choices -#' power_full <- design_full |> +#' power_random <- design_random |> #' cbc_choices(obsID = "obsID", priors = priors) |> #' cbc_power( #' pars = c("price", "type", "freshness"), -#' outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 +#' outcome = "choice", obsID = "obsID", nbreaks = 5, n_q = 6, n_cores = 2 #' ) #' power_bayesian <- design_bayesian |> #' cbc_choices(obsID = "obsID", priors = priors) |> #' cbc_power( #' pars = c("price", "type", "freshness"), -#' outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 +#' outcome = "choice", obsID = "obsID", nbreaks = 5, n_q = 6, n_cores = 2 #' ) #' #' # Compare power of each design -#' plot_compare_power(power_bayesian, power_full) +#' plot_compare_power(power_bayesian, power_random) +#' } plot_compare_power <- function(...) { power <- list(...) design_names <- unlist(lapply(as.list(match.call())[-1], deparse)) diff --git a/man/cbc_design.Rd b/man/cbc_design.Rd index ae66fc0..18da447 100644 --- a/man/cbc_design.Rd +++ b/man/cbc_design.Rd @@ -218,40 +218,13 @@ design_random <- cbc_design( n_q = 6 # Number of questions per respondent ) -# Make a survey using a full factorial design +# Make a survey using a full factorial design and include a "no choice" option design_full <- cbc_design( profiles = profiles, n_resp = 100, # Number of respondents n_alts = 3, # Number of alternatives per question n_q = 6, # Number of questions per respondent - method = 'full' -) - -# Make a survey from an orthogonal array of profiles -design_orthogonal <- cbc_design( - profiles = profiles, - n_resp = 100, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - method = 'orthogonal' -) - -# Make a D-optimal design -design_dopt <- cbc_design( - profiles = profiles, - n_resp = 100, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent - method = 'dopt' -) - -# Make a survey by randomly sampling from all possible profiles -# and include a "no choice" option -design_random_nochoice <- cbc_design( - profiles = profiles, - n_resp = 100, # Number of respondents - n_alts = 3, # Number of alternatives per question - n_q = 6, # Number of questions per respondent + method = 'full', # Change this to use a different method, e.g. 'orthogonal', or 'dopt' no_choice = TRUE ) diff --git a/man/plot_compare_power.Rd b/man/plot_compare_power.Rd index 737cc74..bf08cd0 100644 --- a/man/plot_compare_power.Rd +++ b/man/plot_compare_power.Rd @@ -19,19 +19,20 @@ different designs. Each design is color coded and each facet (sub plot) is a model coefficient. } \examples{ +\dontrun{ library(cbcTools) # Generate all possible profiles profiles <- cbc_profiles( - price = c(1, 1.5, 2, 2.5, 3, 3.5, 4, 4.5, 5), + price = c(1, 1.5, 2, 2.5, 3), type = c("Fuji", "Gala", "Honeycrisp"), freshness = c('Poor', 'Average', 'Excellent') ) # Make designs to compare: full factorial vs bayesian d-efficient -design_full <- cbc_design( +design_random <- cbc_design( profiles = profiles, - n_resp = 300, n_alts = 3, n_q = 6 + n_resp = 100, n_alts = 3, n_q = 6 ) # Same priors will be used in bayesian design and simulated choices priors <- list( @@ -41,24 +42,25 @@ priors <- list( ) design_bayesian <- cbc_design( profiles = profiles, - n_resp = 300, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", + n_resp = 100, n_alts = 3, n_q = 6, n_start = 1, method = "CEA", priors = priors, parallel = FALSE ) # Obtain power for each design by simulating choices -power_full <- design_full |> +power_random <- design_random |> cbc_choices(obsID = "obsID", priors = priors) |> cbc_power( pars = c("price", "type", "freshness"), - outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + outcome = "choice", obsID = "obsID", nbreaks = 5, n_q = 6, n_cores = 2 ) power_bayesian <- design_bayesian |> cbc_choices(obsID = "obsID", priors = priors) |> cbc_power( pars = c("price", "type", "freshness"), - outcome = "choice", obsID = "obsID", nbreaks = 10, n_q = 6, n_cores = 2 + outcome = "choice", obsID = "obsID", nbreaks = 5, n_q = 6, n_cores = 2 ) # Compare power of each design -plot_compare_power(power_bayesian, power_full) +plot_compare_power(power_bayesian, power_random) +} } From a1ceb125c1d1f805dfa343c723227d8efe2565f5 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Mon, 10 Jul 2023 19:31:28 -0400 Subject: [PATCH 41/43] Update CRAN-SUBMISSION --- CRAN-SUBMISSION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 485b557..48dbe17 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ Version: 0.5.0 -Date: 2023-07-10 20:57:55 UTC -SHA: fca31de3171d8e3307b5814085127d121a22522a +Date: 2023-07-10 23:30:40 UTC +SHA: e862e8640e33a35c32c9eb250bbbd7eb5b93d6e3 From 56f2a06e8c004d612a37563315fe927a8f10c739 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Tue, 11 Jul 2023 06:32:30 -0400 Subject: [PATCH 42/43] Update CRAN-SUBMISSION --- CRAN-SUBMISSION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION index 48dbe17..66da749 100644 --- a/CRAN-SUBMISSION +++ b/CRAN-SUBMISSION @@ -1,3 +1,3 @@ Version: 0.5.0 -Date: 2023-07-10 23:30:40 UTC -SHA: e862e8640e33a35c32c9eb250bbbd7eb5b93d6e3 +Date: 2023-07-11 10:27:50 UTC +SHA: a1ceb125c1d1f805dfa343c723227d8efe2565f5 From 7b455dd3ac817228bb42d5719a797563b0f4cd62 Mon Sep 17 00:00:00 2001 From: emse-p4a-gwu Date: Wed, 12 Jul 2023 08:06:51 -0400 Subject: [PATCH 43/43] small updates to vignette --- vignettes/usage.Rmd | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/vignettes/usage.Rmd b/vignettes/usage.Rmd index 5e6c9fe..70b3199 100644 --- a/vignettes/usage.Rmd +++ b/vignettes/usage.Rmd @@ -58,21 +58,19 @@ If you do wish to restrict some attribute level combinations, you can do so usin - `"Gala"` apples will not be shown with the prices `1.5`, `2.5`, and `3.5`. - `"Honeycrisp"` apples will not be shown with prices less than `2`. -- `"Honeycrisp"` apples will only be shown with `"Average"` freshness. - `"Fuji"` apples will not be shown with the `"Excellent"` freshness. -With these restrictions, there are now only 39 profiles compared to 81 without them: +With these restrictions, there are now only 57 profiles compared to 81 without them: ```{r} restricted_profiles <- cbc_restrict( profiles, type == "Gala" & price %in% c(1.5, 2.5, 3.5), - type == "Honeycrisp" & price > 2, - type == "Honeycrisp" & freshness %in% c("Poor", "Excellent"), + type == "Honeycrisp" & price < 2, type == "Fuji" & freshness == "Excellent" ) -restricted_profiles +dim(restricted_profiles) ``` # Generate survey designs @@ -85,23 +83,14 @@ Once a set of profiles is obtained, a conjoint survey can then be generated usin - `n_q`: The number of questions per respondent. - `method`: The design strategy to use (defaults to `"random"`). -The `method` argument determines the design method used. Options are: +The `method` argument determines the design strategy to use: `"random"`, `"full"`, `"orthogonal"`, `"dopt"`, `"CEA"`, or `"Modfed"`. All methods ensure that the two following criteria are met: -- `"random"` -- `"full"` -- `"orthogonal"` -- `"dopt"` -- `"CEA"` -- `"Modfed"` - -All methods ensure that the two following criteria are met: - -1. No two profiles are the same within any one choice set. +1. No two profiles are the same within any one choice set. 2. No two choice sets are the same within any one respondent. -The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profile, and the use of blocking. +The table below summarizes method compatibility with other design options, including the ability to include a "no choice" option, the creation of a "labeled" design (also called a "alternative-specific" design), the use of restricted profiles, and the use of blocking. -Method | Include "no choice"? | Labeled designs? | Restricted profiles? | Blocking? +Method | No choice | Labeled designs | Restricted profiles | Blocking ---|---|---|---|--- `"random"` | Yes | Yes | Yes | No `"full"` | Yes | Yes | Yes | Yes