Merge pull request #24 from jhelvy/predict

Predict
jhelvy · Oct 25, 2021 · aec84f6 · aec84f6
2 parents 8d9d161 + ee02c1d
commit aec84f6
Show file tree

Hide file tree

Showing 74 changed files with 1,354 additions and 1,589 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: logitr
 Title: Logit Models w/Preference & WTP Space Utility Parameterizations
-Version: 0.3.1
+Version: 0.4.0
 Authors@R: c(
     person(given   = "John",
            family  = "Helveston",
@@ -11,7 +11,7 @@ Authors@R: c(
            family  = "Forsythe",
            role    = "ctb",
            email   = "[email protected]"))
-Description: Estimation of multinomial (MNL) and mixed logit (MXL) models in R. Models can be estimated using "Preference" space or "Willingness-to-pay" (WTP) space utility parameterizations. Weighted models can also be estimated. An option is available to run a multistart optimization loop with random starting points in each iteration, which is useful for non-convex problems like MXL models or models with WTP space utility parameterizations. The main optimization loop uses the 'nloptr' package to minimize the negative log-likelihood function. Additional functions are available for computing and comparing WTP from both preference space and WTP space models and for predicting expected choices and choice probabilities for sets of alternatives based on an estimated model. MXL models assume uncorrelated heterogeneity covariances and are estimated using maximum simulated likelihood based on the algorithms in Train (2009) "Discrete Choice Methods with Simulation, 2nd Edition" <doi:10.1017/CBO9780511805271>.
+Description: Fast estimation of multinomial (MNL) and mixed logit (MXL) models in R. Models can be estimated using "Preference" space or "Willingness-to-pay" (WTP) space utility parameterizations. Weighted models can also be estimated. An option is available to run a multistart optimization loop with random starting points in each iteration, which is useful for non-convex problems like MXL models or models with WTP space utility parameterizations. The main optimization loop uses the 'nloptr' package to minimize the negative log-likelihood function. Additional functions are available for computing and comparing WTP from both preference space and WTP space models and for predicting expected choices and choice probabilities for sets of alternatives based on an estimated model. MXL models assume uncorrelated heterogeneity covariances and are estimated using maximum simulated likelihood based on the algorithms in Train (2009) "Discrete Choice Methods with Simulation, 2nd Edition" <doi:10.1017/CBO9780511805271>.
 License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -2,14 +2,18 @@
 
 S3method(coef,logitr)
 S3method(coef,summary.logitr)
+S3method(fitted,logitr)
 S3method(logLik,logitr)
+S3method(predict,logitr)
 S3method(print,logitr)
 S3method(print,logitr_wtp)
 S3method(print,summary.logitr)
+S3method(residuals,logitr)
 S3method(se,logitr)
 S3method(summary,logitr)
 S3method(terms,logitr)
 S3method(vcov,logitr)
+S3method(wtp,logitr)
 export(dummyCode)
 export(logitr)
 export(predictChoices)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,18 @@
+# logitr 0.4.0
+
+## Larger changes:
+
+- A new `predict.logitr()` method was added for making probability and choice predictions from logitr class objects.
+- The `predictProbs()` and `predictChoices()` functions were depreciated.
+- Added new `fitted.logitr()` and `residuals.logitr()` methods.
+- Added optional `predict` argument to the main `logitr()` function which controls whether predicted probabilities, fitted.values, and residuals are included in the returned object. Default setting is TRUE.
+- Changed the name of the coefficients vector in the returned object from "coef" to "coefficients" to be consistent with other packages.
+- Changed the argument name from "choice" to "outcome" to be more general
+
+## Bugs:
+
+- Fixed bug where the returned object contained the scaled data rather than the original, unscaled data
+
 # logitr 0.3.1
 
 - Bug fix: Cast X object to matrix for single-parameter models

diff --git a/R/additionalInfo.R b/R/additionalInfo.R
@@ -3,29 +3,27 @@
 # ============================================================================
 
 appendModelInfo <- function(model, modelInputs) {
-  parsUnscaled <- model$coef
+  parsUnscaled <- model$coefficients
   parNames <- c(modelInputs$parList$mu, modelInputs$parList$sigma)
   names(parsUnscaled) <- parNames
-  scaleFactors <- model$data$scaleFactors
+  scaleFactors <- modelInputs$scaleFactors
   if (model$fail) {
-    coef <- parsUnscaled*NA
-    gradient <- matrix(coef, ncol = 1)
+    gradient <- matrix(parsUnscaled*NA, ncol = 1)
     row.names(gradient) <- parNames
     hessian <- matrix(NA, nrow = length(parNames), ncol = length(parNames))
     row.names(hessian) <- parNames
     colnames(hessian) <- parNames
     nullLogLik <- NA
   } else {
-    coef       <- getCoefs(parsUnscaled, scaleFactors, modelInputs)
-    gradient   <- getGradient(parsUnscaled, scaleFactors, modelInputs)
-    hessian    <- getHessian(parsUnscaled, scaleFactors, modelInputs)
-    nullLogLik <- -1 * modelInputs$evalFuncs$negLL(coef * 0, modelInputs)
+    coefficients <- getCoefs(parsUnscaled, scaleFactors, modelInputs)
+    gradient     <- getGradient(parsUnscaled, scaleFactors, modelInputs)
+    hessian      <- getHessian(parsUnscaled, scaleFactors, modelInputs)
+    nullLogLik   <- -1*modelInputs$evalFuncs$negLL(coefficients*0, modelInputs)
   }
-  model$coef <- coef
+  model$coefficients <- coefficients
   model$gradient <- gradient
   model$hessian <- hessian
   model$nullLogLik <- nullLogLik
-  model$scaleFactors <- scaleFactors
   model$result <- NULL
   model$fail <- NULL
   return(model)

diff --git a/R/data.R b/R/data.R
@@ -19,10 +19,6 @@
 #' `price` | price of yogurt
 #' `feat` | dummy for whether a newspaper advertisement was shown to the customer (`1` or `0`)
 #' `brand` | yogurt brand: `"yoplait"`, `"dannon"`, `"hiland"`, or `"weight"` (for weight watcher)
-#' `dannon` | dummy variable for the `"dannon"` brand (`1` or `0`)
-#' `hiland` | dummy variable for the `"hiland"` brand (`1` or `0`)
-#' `weight` | dummy variable for the `"weight"` brand (`1` or `0`)
-#' `yoplait` | dummy variable for the `"yoplait"` brand (`1` or `0`)
 #'
 #' @docType data
 #'

diff --git a/R/encoding.R b/R/encoding.R
@@ -23,7 +23,7 @@ dummyCode <- function(df, vars) {
 #' well as two vectors (`pars` and `randPars`) with discrete (categorical)
 #' variables and interaction variables added to `X`, `pars`, and
 #' `randPars`.
-#' @param data The choice data, formatted as a `data.frame` object.
+#' @param data The data, formatted as a `data.frame` object.
 #' @param pars The names of the parameters to be estimated in the model.
 #' Must be the same as the column names in the `data` argument. For WTP space
 #' models, do not include price in `pars`.

diff --git a/R/inputChecks.R b/R/inputChecks.R
@@ -162,25 +162,63 @@ checkOptions <- function(options) {
   return(options)
 }
 
-predictInputsCheck <- function(model, alts, altID, obsID) {
-  if (!is_logitr(model)) {
+predictInputsCheck <- function(object, newdata, obsID, price, type, ci) {
+  if (!is_logitr(object)) {
     stop(
-      'The "model" argument must be a model estimated using the logitr() ',
+      'The "object" argument must be a object estimated using the logitr() ',
       'function.'
     )
   }
-  if (missing(alts)) stop('"alts" needs to be specified')
-  if (missing(altID)) stop('"altID" needs to be specified')
-  if (! altID %in% names(alts)) {
+  if (missing(newdata)) stop('"newdata" needs to be specified')
+  if (!is.null(newdata)) {
+    if (is.null(obsID)) {
+      stop('"obsID" must be specified if newdata is not NULL')
+    }
+    if (object$inputs$modelSpace == "wtp") {
+      if (is.null(price)) {
+        stop(
+          '"price" must be specified if "object" is a WTP space model and ',
+          'newdata is not NULL'
+        )
+      }
+    }
+    if (!is.null(obsID)) {
+      if (! obsID %in% names(newdata)) {
+        stop(
+          'The "obsID" argument refers to a column that does not exist in ',
+          'the "newdata" data frame'
+        )
+      }
+    }
+    if (!is.null(price)) {
+      if (! price %in% names(newdata)) {
+        stop(
+          'The "price" argument refers to a column that does not exist in ',
+          'the "newdata" data frame'
+        )
+      }
+    }
+  }
+  if ("probs" %in% type) {
+    stop('Use "prob" instead of "probs" in the type argument')
+  }
+  if ("outcomes" %in% type) {
+    stop('Use "outcome" instead of "outcomes" in the type argument')
+  }
+  typeTest <- identical(type, "prob") |
+    identical(type, "outcome") |
+    identical(type, c("prob", "outcome")) |
+    identical(type, c("outcome", "prob"))
+  if (!typeTest) {
     stop(
-      'The "altID" argument refers to a column that does not exist in ',
-      'the "alts" data frame')
+      'type must be a vector containing "prob" (for returning ',
+      'predicted probabilities) and / or "outcome" (for returning predicted ',
+      'outcomes)')
   }
-  if (!is.null(obsID)) {
-    if (! obsID %in% names(alts)) {
-      stop(
-        'The "obsID" argument refers to a column that does not exist in ',
-        'the "alts" data frame')
+  if (!is.null(ci)) {
+    ci_test <- (ci < 1) & (ci > 0)
+    if (!ci_test) {
+      stop("ci must be a number between 0 and 1")
     }
   }
 }
@@ -197,10 +235,11 @@ predictParCheck <- function(model, X) {
     dataPars <- paste(dataNames, collapse = ", ")
     stop(paste0(
       'The coefficient names for the provided model do not correspond to ',
-      'variables in "alts".\n\n',
+      'variables in "newdata".\n\n',
       'Expect columns:\n\t', modelPars, '\n\n',
-      'Encoded column names from provided `alts` object:\n\t', dataPars, '\n\n',
-      'If you have a factor variable in "alts", check that the factor ',
+      'Encoded column names from provided `newdata` object:\n\t', dataPars,
+      '\n\n',
+      'If you have a factor variable in "newdata", check that the factor ',
       'levels match those of the data used to estimate the model.'
     ))
   }
@@ -212,7 +251,7 @@ wtpInputsCheck <- function(model, price) {
   if (!is_logitr(model)) {
     stop('model must be a model estimated using the logitr() function.')
   }
-  if (! price %in% names(model$coef)) {
+  if (! price %in% names(stats::coef(model))) {
     stop('"price" must be the name of a coefficient in "model".')
   }
   if (model$inputs$modelSpace != "pref") {
@@ -230,7 +269,7 @@ wtpCompareInputsCheck <- function(model_pref, model_wtp, price) {
   if (!is_logitr(model_wtp)) {
     stop('"model_wtp" must be a model estimated using the logitr() function.')
   }
-  if (! price %in% names(model_pref$coef)) {
+  if (! price %in% names(stats::coef(model_pref))) {
     stop('"price" must be the name of a coefficient in "model_pref"')
   }
 }