Student t distribution in alm(). Issue #13

config-i1 · Oct 30, 2018 · caf0020 · caf0020
1 parent 75f298e
commit caf0020
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 23 deletions.
diff --git a/CRAN-RELEASE b/CRAN-RELEASE
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,8 +1,8 @@
 Package: greybox
 Type: Package
 Title: Toolbox for Model Building and Forecasting
-Version: 0.3.2
-Date: 2018-10-25
+Version: 0.3.3.41001
+Date: 2018-10-30
 Authors@R: person("Ivan", "Svetunkov", email = "[email protected]", role = c("aut", "cre"),
                   comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
 URL: https://github.com/config-i1/greybox

diff --git a/NAMESPACE b/NAMESPACE
@@ -122,6 +122,7 @@ importFrom(stats,dlogis)
 importFrom(stats,dnbinom)
 importFrom(stats,dnorm)
 importFrom(stats,dpois)
+importFrom(stats,dt)
 importFrom(stats,end)
 importFrom(stats,fitted)
 importFrom(stats,frequency)

diff --git a/NEWS b/NEWS
@@ -1,3 +1,13 @@
+greybox v0.3.3 (Release data: 2018-10-30)
+==============
+
+Changes:
+* Student t distribution in alm.
+
+Bugfixes:
+* Corrected a typo of "plogos" in alm.
+
+
 greybox v0.3.2 (Release data: 2018-10-25)
 ==============
 

diff --git a/R/alm.R b/R/alm.R
@@ -6,12 +6,18 @@
 #' non-normal distributions. These include:
 #' \enumerate{
 #' \item Normal distribution, \link[stats]{dnorm},
-#' \item Folded normal distribution, \link[greybox]{dfnorm},
-#' \item Log normal distribution, \link[stats]{dlnorm},
+#' \item Logistic Distribution, \link[stats]{dlogis},
 #' \item Laplace distribution, \link[greybox]{dlaplace},
+#' \item Asymmetric Laplace distribution, \link[greybox]{dalaplace},
+#' \item T-distribution, \link[stats]{dt},
 #' \item S-distribution, \link[greybox]{ds},
+#' \item Folded normal distribution, \link[greybox]{dfnorm},
+#' \item Log normal distribution, \link[stats]{dlnorm},
 #' \item Chi-Squared Distribution, \link[stats]{dchisq},
-#' \item Logistic Distribution, \link[stats]{dlogis}.
+#' \item Poisson Distribution, \link[stats]{dpois},
+#' \item Negative Binomial Distribution, \link[stats]{dnbinom},
+#' \item Cumulative Logistic Distribution, \link[stats]{plogis},
+#' \item Cumulative Normal distribution, \link[stats]{pnorm}.
 #' }
 #'
 #' This function is slower than \code{lm}, because it relies on likelihood estimation
@@ -116,11 +122,11 @@
 #' @importFrom numDeriv hessian
 #' @importFrom nloptr nloptr
 #' @importFrom stats model.frame sd terms
-#' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom
+#' @importFrom stats dchisq dlnorm dnorm dlogis dpois dnbinom dt
 #' @importFrom stats plogis
 #' @export alm
 alm <- function(formula, data, subset, na.action,
-                distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds",
+                distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt",
                                "dfnorm","dlnorm","dchisq",
                                "dpois","dnbinom",
                                "plogis","pnorm"),
@@ -131,7 +137,7 @@ alm <- function(formula, data, subset, na.action,
     cl <- match.call();
 
     distribution <- distribution[1];
-    if(all(distribution!=c("dnorm","dlogis","dlaplace","dalaplace","ds","dfnorm","dlnorm","dchisq",
+    if(all(distribution!=c("dnorm","dlogis","dlaplace","dalaplace","ds","dt","dfnorm","dlnorm","dchisq",
                            "dpois","dnbinom","plogis","pnorm"))){
         if(any(distribution==c("norm","fnorm","lnorm","laplace","s","chisq","logis"))){
             warning(paste0("You are using the old value of the distribution parameter.\n",
@@ -364,6 +370,7 @@ alm <- function(formula, data, subset, na.action,
                        "dlaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
+                       "dt" =,
                        "ds" =,
                        "pnorm" =,
                        "plogis" = matrixXreg %*% B
@@ -377,6 +384,7 @@ alm <- function(formula, data, subset, na.action,
                         "dalaplace" = meanFast((y-mu) * (alpha - (y<=mu)*1)),
                         "dlogis" = sqrt(meanFast((y-mu)^2) * 3 / pi^2),
                         "ds" = meanFast(sqrt(abs(y-mu))) / 2,
+                        "dt" = max(2,2/(1-(meanFast((y-mu)^2))^{-1})),
                         "dchisq" =,
                         "dnbinom" = abs(B[1]),
                         "dpois" = mu,
@@ -397,6 +405,7 @@ alm <- function(formula, data, subset, na.action,
                            "dlaplace" = dlaplace(y, mu=fitterReturn$mu, b=fitterReturn$scale, log=TRUE),
                            "dalaplace" = dalaplace(y, mu=fitterReturn$mu, b=fitterReturn$scale, alpha=alpha, log=TRUE),
                            "dlogis" = dlogis(y, location=fitterReturn$mu, scale=fitterReturn$scale, log=TRUE),
+                           "dt" = dt(y-fitterReturn$mu, df=fitterReturn$scale, log=TRUE),
                            "ds" = ds(y, mu=fitterReturn$mu, b=fitterReturn$scale, log=TRUE),
                            "dchisq" = dchisq(y, df=fitterReturn$scale, ncp=fitterReturn$mu, log=TRUE),
                            "dpois" = dpois(y, lambda=fitterReturn$mu, log=TRUE),
@@ -463,7 +472,7 @@ alm <- function(formula, data, subset, na.action,
             BUpper <- rep(Inf,length(B));
         }
 
-        if(any(distribution==c("dpois","dnbinom","plogos","pnorm"))){
+        if(any(distribution==c("dpois","dnbinom","plogis","pnorm"))){
             maxeval <- 500;
         }
         else{
@@ -526,6 +535,7 @@ alm <- function(formula, data, subset, na.action,
                        "dlaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
+                       "dt" =,
                        "ds" =,
                        "dpois" =,
                        "dnbinom" = mu,
@@ -541,6 +551,7 @@ alm <- function(formula, data, subset, na.action,
                        "dlaplace" =,
                        "dalaplace" =,
                        "dlogis" =,
+                       "dt" =,
                        "ds" =,
                        "dnorm" =,
                        "dpois" =,

diff --git a/R/methods.R b/R/methods.R
@@ -193,6 +193,7 @@ pointLik.alm <- function(object, ...){
                         "dlnorm" = dlnorm(y, meanlog=mu, sdlog=scale, log=TRUE),
                         "dlaplace" = dlaplace(y, mu=mu, b=scale, log=TRUE),
                         "dlogis" = dlogis(y, location=mu, scale=scale, log=TRUE),
+                        "dt" = dt(y-mu, df=scale, log=TRUE),
                         "ds" = ds(y, mu=mu, b=scale, log=TRUE),
                         "dpois" = dpois(y, lambda=mu, log=TRUE),
                         "dnbinom" = dnbinom(y, mu=mu, size=scale, log=TRUE),
@@ -478,6 +479,14 @@ predict.alm <- function(object, newdata=NULL, interval=c("none", "confidence", "
         }
         greyboxForecast$scale <- bValues;
     }
+    else if(object$distribution=="dt"){
+        # Use df estimated by the model and then construct conventional intervals. df=2 is the minimum in this model.
+        df <- object$scale^{-2};
+        if(interval!="n"){
+            greyboxForecast$lower[] <- greyboxForecast$mean + sqrt(greyboxForecast$variances) * qt(levelLow,df);
+            greyboxForecast$upper[] <- greyboxForecast$mean + sqrt(greyboxForecast$variances) * qt(levelUp,df);
+        }
+    }
     else if(object$distribution=="ds"){
         # Use the connection between the variance and b in S distribution
         bValues <- (greyboxForecast$variances/120)^0.25;
@@ -1044,6 +1053,7 @@ print.summary.alm <- function(x, ...){
                       "dlogis" = "Logistic",
                       "dlaplace" = "Laplace",
                       "dalaplace" = paste0("Asymmetric Laplace with alpha=",round(x$other$alpha,2)),
+                      "dt" = "Student t",
                       "ds" = "S",
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",
@@ -1083,6 +1093,7 @@ print.summary.greybox <- function(x, ...){
                       "dlogis" = "Logistic",
                       "dlaplace" = "Laplace",
                       "dalaplace" = "Asymmetric Laplace",
+                      "dt" = "Student t",
                       "ds" = "S",
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",
@@ -1118,6 +1129,7 @@ print.summary.greyboxC <- function(x, ...){
                       "dlogis" = "Logistic",
                       "dlaplace" = "Laplace",
                       "dalaplace" = "Asymmetric Laplace",
+                      "dt" = "Student t",
                       "ds" = "S",
                       "dfnorm" = "Folded Normal",
                       "dlnorm" = "Log Normal",

diff --git a/man/alm.Rd b/man/alm.Rd
diff --git a/vignettes/alm.Rmd b/vignettes/alm.Rmd
@@ -38,15 +38,15 @@ Every model produced using `alm()` can be represented as:
 \begin{equation} \label{eq:basicALM}
     y_t = f(\mu_t, \epsilon_t) = f(x_t' B, \epsilon_t) ,
 \end{equation}
-where $y_t$ is a value of the response variable, $x_t$ is a vector of exogenous variables, $B$ is a vector of the parameters, $\mu_t$ is the conditional mean (produced based on the exogenous variables and the parameters of the model), $\epsilon_t$ is the error term on the observation $t$ and $f(\cdot)$ is a distribution function that does a transformation of the inputs into the output. In case of a mixture distribution the model becomes slightly more complicated:
+where $y_t$ is the value of the response variable, $x_t$ is the vector of exogenous variables, $B$ is the vector of the parameters, $\mu_t$ is the conditional mean (produced based on the exogenous variables and the parameters of the model), $\epsilon_t$ is the error term on the observation $t$ and $f(\cdot)$ is the distribution function that does a transformation of the inputs into the output. In case of a mixture distribution the model becomes slightly more complicated:
 \begin{equation} \label{eq:basicALMMixture}
     \begin{matrix}
         y_t = o_t f(x_t' B, \epsilon_t) \\
         o_t \sim \text{Bernoulli}(p_t) \\
         p_t = g(z_t' A, \eta_t)
     \end{matrix},
 \end{equation}
-where $o_t$ is a binary variable, $p_t$ is the probability of occurrence, $z_t$ is a vector of exogenous variables, $A$ is a vector of parameters and $\eta$ is a the error term for the $p_t$.
+where $o_t$ is the binary variable, $p_t$ is the probability of occurrence, $z_t$ is the vector of exogenous variables, $A$ is the vector of parameters and $\eta$ is the error term for the $p_t$.
 
 The `alm()` function returns, along with the set of common for `lm()` variables (such as `coefficient` and `fitted.values`), the variable `mu`, which corresponds to the conditional mean used inside the distribution, and `scale` -- the second parameter, which usually corresponds to standard error or dispersion parameter. The values of these two variables vary from distribution to distribution. Note, however, that the `model` variable returned by `lm()` function was renamed into `data` in `alm()`, and that `alm()` does not return `terms` and QR decomposition.
 
@@ -64,6 +64,9 @@ This group of functions includes:
 3. Asymmetric Laplace distribution,
 4. Logistic distribution,
 5. S distribution,
+6. Student t distribution,
+
+For all the functions in this category `resid()` method returns $e_t = y_t - \mu_t$.
 
 ### Normal distribution
 The density of normal distribution is:
@@ -149,7 +152,7 @@ The density function of Logistic distribution is:
 \begin{equation} \label{eq:Logistic}
     f(y_t) = \frac{\exp \left(- \frac{y_t - \mu_t}{s} \right)} {s \left( 1 + \exp \left(- \frac{y_t - \mu_t}{s} \right) \right)^{2}},
 \end{equation}
-where $s$ is a scale parameter, which is estimated in `alm()` based on the connection between the parameter and the variance in the logistic distribution:
+where $s$ is the scale parameter, which is estimated in `alm()` based on the connection between the parameter and the variance in the logistic distribution:
 \begin{equation} \label{eq:sLogisticAndSigma}
     s = \sigma \sqrt{\frac{3}{\pi^2}}.
 \end{equation}
@@ -164,7 +167,7 @@ The S distribution has the following density function:
 \begin{equation} \label{eq:S}
     f(y_t) = \frac{1}{4b^2} \exp \left( -\frac{\sqrt{|y_t - \mu_t|}}{b} \right) ,
 \end{equation}
-where $b$ is a scale parameter. If estimated via maximum likelihood, the scale parameter is equal to:
+where $b$ is the scale parameter. If estimated via maximum likelihood, the scale parameter is equal to:
 \begin{equation} \label{eq:bS}
     b = \frac{1}{T} \sum_{t=1}^T \sqrt{\left| y_t - \mu_t \right|} ,
 \end{equation}
@@ -178,7 +181,21 @@ S distribution has a kurtosis of 25.2, which makes it an "extreme excess" distri
 \end{equation}
 where once again $\sigma^2$ is substituted either by the conditional variance of $\mu_t$ or $y_t$.
 
-For all the functions in this category `resid()` method returns $e_t = y_t - \mu_t$.
+### Student t distribution
+The Student t distribution has a difficult density function:
+\begin{equation} \label{eq:T}
+    f(y_t) = \frac{\Gamma\left(\frac{d+1}{2}\right)}{\sqrt{d \pi} \Gamma\left(\frac{d}{2}\right)} \left( 1 + \frac{x^2}{d} \right)^{-\frac{d+1}{2}} ,
+\end{equation}
+where $d$ is the number of degrees of freedom, which can also be considered as the scale parameter of the distribution. It has the following connection with the in-sample variance of the error (but only for the case, when $d>2$):
+\begin{equation} \label{eq:scaleOfT}
+    d = \frac{2}{1-\sigma^{-2}}.
+\end{equation}
+Given that the formula \eqref{eq:scaleOfT} holds only for cases of $d>2$ (and respectively for $\sigma^2>1$), the degrees of freedom in this case are restricted by 2 from below.
+
+Kurtosis of Student t distribution depends on the value of $d$, and for the cases of $d>4$ is equal to $\frac{6}{d-4}$.
+
+`alm()` function with `distribution="dt"` returns $\mu_t$ in the same variables `mu` and `fitted.values`, and $d$ in the `scale` variable. Both prediction and confidence intervals use `qt()` function from `stats` package and rely on the estimated in-sample value of $d$. The intervals are constructed similarly to how it is done in Normal distribution \eqref{eq:intervalsNormal} (based on `qt()` function).
+
 
 ## Continuous density functions for positive data
 This group includes:
@@ -234,7 +251,7 @@ The density function of Noncentral Chi Squared distribution is quite difficult.
 \begin{equation} \label{eq:NCChiSquared}
     f(y_t) = \frac{1}{2} \exp \left( -\frac{y_t + \lambda_t}{2} \right) \left(\frac{y_t}{\lambda_t} \right)^{\frac{k}{4}-0.5} I_{\frac{k}{2}-1}(\sqrt{\lambda_t y_t}),
 \end{equation}
-where $I_k(x)$ is a Bessel function of the first kind. The $\lambda_t$ parameter is estimated from a regression with exogenous variables:
+where $I_k(x)$ is the Bessel function of the first kind. The $\lambda_t$ parameter is estimated from a regression with exogenous variables:
 \begin{equation} \label{eq:lambdaValue}
     \lambda_t = ( x_t' B )^2 ,
 \end{equation}