Skip to content

Commit

Permalink
Asymmetric Laplace in alm() (issue #13)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ivan Svetunkov committed Oct 3, 2018
1 parent e06ffc2 commit 1d22422
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 32 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: greybox
Type: Package
Title: Toolbox for Model Building and Forecasting
Version: 0.3.2.41008
Date: 2018-10-02
Version: 0.3.2.41009
Date: 2018-10-03
Authors@R: person("Ivan", "Svetunkov", email = "[email protected]", role = c("aut", "cre"),
comment="Lecturer at Centre for Marketing Analytics and Forecasting, Lancaster University, UK")
URL: https://github.com/config-i1/greybox
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ S3method(forecast,alm)
S3method(forecast,greybox)
S3method(getResponse,greybox)
S3method(logLik,alm)
S3method(nParam,alm)
S3method(nParam,default)
S3method(nParam,greyboxC)
S3method(nParam,logLik)
Expand Down
3 changes: 2 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
greybox v0.3.2 (Release data: 2018-01-02)
greybox v0.3.2 (Release data: 2018-01-03)
==============

Changes:
Expand All @@ -15,6 +15,7 @@ Changes:
* predict with distriubion=dnbinom in cases, when scale is not available, is now calculated based on the definition of scale via variance and mean.
* pointLik.ets() is now calculated differently, so that sum(pointLik) is close to the logLik produced by ets() function. The problem with logLik of ets() is that it is not calculated correctly, chopping off some parts of normal distribution. Total disaster!
* New set of distribution functions - for Asymmetric Laplace Distribution.
* alm() now estimates models with Asymmetric Laplace Distribution with predefined alpha parameter. This is equivalent to the quantile regression with alpha quantile, but is done from the likelihood point of view. It also allows estimating alpha in sample.

Bugfixes:
* predict.alm() sometimes produced NAs in the lower bound.
Expand Down
3 changes: 1 addition & 2 deletions R/alaplace.R
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,7 @@
#' @export dalaplace
#' @aliases dalaplace
dalaplace <- function(q, mu=0, b=1, alpha=0.5, log=FALSE){
e <- q-mu
alaplaceReturn <- alpha * (1-alpha) / b * exp(-(e)/b * (alpha - ((e) <= 0)*1));
alaplaceReturn <- alpha * (1-alpha) / b * exp(-(q-mu)/b * (alpha - (q<=mu)*1));
if(log){
alaplaceReturn <- log(alaplaceReturn);
}
Expand Down
68 changes: 63 additions & 5 deletions R/alm.R
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@
#' \item call - how the model was called,
#' \item rank - rank of the model,
#' \item data - data used for the model construction,
#' \item occurrence - the occurrence model used in the estimation.
#' \item occurrence - the occurrence model used in the estimation,
#' \item other - the list of all the other parameters either passed to the
#' function or estimated in the process, but not included in the standard output
#' (e.g. \code{alpha} for Asymmetric Laplace).
#' }
#'
#' @seealso \code{\link[greybox]{stepwise}, \link[greybox]{lmCombine}}
Expand Down Expand Up @@ -117,7 +120,8 @@
#' @importFrom stats plogis
#' @export alm
alm <- function(formula, data, subset, na.action,
distribution=c("dnorm","dfnorm","dlnorm","dlaplace","dlogis","ds","dchisq",
distribution=c("dnorm","dlogis","dlaplace","dalaplace","ds",
"dfnorm","dlnorm","dchisq",
"dpois","dnbinom",
"plogis","pnorm"),
occurrence=c("none","plogis","pnorm"),
Expand All @@ -127,7 +131,7 @@ alm <- function(formula, data, subset, na.action,
cl <- match.call();

distribution <- distribution[1];
if(all(distribution!=c("dnorm","dfnorm","dlnorm","dlaplace","dlogis","ds","dchisq",
if(all(distribution!=c("dnorm","dlogis","dlaplace","dalaplace","ds","dfnorm","dlnorm","dchisq",
"dpois","dnbinom","plogis","pnorm"))){
if(any(distribution==c("norm","fnorm","lnorm","laplace","s","chisq","logis"))){
warning(paste0("You are using the old value of the distribution parameter.\n",
Expand All @@ -140,6 +144,20 @@ alm <- function(formula, data, subset, na.action,
}
}

ellipsis <- list(...);

# If this is ALD, then see if alpha was provided. Otherwise estimate it.
if(distribution=="dalaplace"){
if(is.null(ellipsis$alpha)){
ellipsis$alpha <- alpha <- 0.5
alphaEstimate <- TRUE;
}
else{
alpha <- ellipsis$alpha;
alphaEstimate <- FALSE;
}
}

if(is.alm(occurrence)){
occurrenceModel <- TRUE;
occurrenceProvided <- TRUE;
Expand Down Expand Up @@ -329,6 +347,13 @@ alm <- function(formula, data, subset, na.action,
}

fitter <- function(B, distribution, y, matrixXreg){
if(distribution=="dalaplace"){
if(alphaEstimate){
alpha <- B[1];
B <- B[-1];
}
}

mu[] <- switch(distribution,
"dpois" = exp(matrixXreg %*% B),
"dchisq" = ifelseFast(any(matrixXreg %*% B[-1] <0),1E+100,(matrixXreg %*% B[-1])^2),
Expand All @@ -337,6 +362,7 @@ alm <- function(formula, data, subset, na.action,
"dfnorm" =,
"dlnorm" =,
"dlaplace" =,
"dalaplace" =,
"dlogis" =,
"ds" =,
"pnorm" =,
Expand All @@ -348,6 +374,7 @@ alm <- function(formula, data, subset, na.action,
"dfnorm" = sqrt(meanFast((y-mu)^2)),
"dlnorm" = sqrt(meanFast((log(y)-mu)^2)),
"dlaplace" = meanFast(abs(y-mu)),
"dalaplace" = meanFast((y-mu) * (alpha - (y<=mu)*1)),
"dlogis" = sqrt(meanFast((y-mu)^2) * 3 / pi^2),
"ds" = meanFast(sqrt(abs(y-mu))) / 2,
"dchisq" =,
Expand All @@ -368,6 +395,7 @@ alm <- function(formula, data, subset, na.action,
"dfnorm" = dfnorm(y, mu=fitterReturn$mu, sigma=fitterReturn$scale, log=TRUE),
"dlnorm" = dlnorm(y, meanlog=fitterReturn$mu, sdlog=fitterReturn$scale, log=TRUE),
"dlaplace" = dlaplace(y, mu=fitterReturn$mu, b=fitterReturn$scale, log=TRUE),
"dalaplace" = dalaplace(y, mu=fitterReturn$mu, b=fitterReturn$scale, alpha=alpha, log=TRUE),
"dlogis" = dlogis(y, location=fitterReturn$mu, scale=fitterReturn$scale, log=TRUE),
"ds" = ds(y, mu=fitterReturn$mu, b=fitterReturn$scale, log=TRUE),
"dchisq" = dchisq(y, df=fitterReturn$scale, ncp=fitterReturn$mu, log=TRUE),
Expand Down Expand Up @@ -413,6 +441,11 @@ alm <- function(formula, data, subset, na.action,
else if(distribution=="dchisq"){
B <- c(1, B);
}
else if(distribution=="dalaplace"){
if(alphaEstimate){
B <- c(alpha, B);
}
}

if(any(distribution==c("dpois","dnbinom","plogos","pnorm"))){
maxeval <- 500;
Expand Down Expand Up @@ -445,18 +478,36 @@ alm <- function(formula, data, subset, na.action,
scale <- abs(B[1]);
names(B) <- c("df",variablesNames);
}
else if(distribution=="dalaplace"){
if(alphaEstimate){
ellipsis$alpha <- alpha <- B[1];
variablesNames <- c("alpha",variablesNames);
names(B) <- variablesNames;
}
else{
names(B) <- variablesNames;
}
}
else{
names(B) <- variablesNames;
}

# Parameters of the model + scale
df <- nVariables + 1;

if(distribution=="dalaplace"){
if(alphaEstimate){
df <- df + 1;
nVariables <- nVariables + 1;
}
}

### Fitted values in the scale of the original variable
yFitted[] <- switch(distribution,
"dfnorm" = sqrt(2/pi)*scale*exp(-mu^2/(2*scale^2))+mu*(1-2*pnorm(-mu/scale)),
"dnorm" =,
"dlaplace" =,
"dalaplace" =,
"dlogis" =,
"ds" =,
"dpois" =,
Expand All @@ -471,6 +522,7 @@ alm <- function(formula, data, subset, na.action,
errors[] <- switch(distribution,
"dfnorm" =,
"dlaplace" =,
"dalaplace" =,
"dlogis" =,
"ds" =,
"dnorm" =,
Expand All @@ -488,7 +540,7 @@ alm <- function(formula, data, subset, na.action,
method.args <- list(d=1e-6, r=6);
}
else{
if(any(distribution==c("dnbinom","dlaplace"))){
if(any(distribution==c("dnbinom","dlaplace","dalaplace"))){
method.args <- list(d=1e-6, r=6);
}
else{
Expand Down Expand Up @@ -588,12 +640,18 @@ alm <- function(formula, data, subset, na.action,
if(any(distribution==c("dchisq","dnbinom"))){
B <- B[-1];
}
else if(distribution=="dalaplace"){
if(alphaEstimate){
variablesNames <- variablesNames[-1];
nVariables <- nVariables - 1;
}
}

finalModel <- list(coefficients=B, vcov=vcovMatrix, fitted.values=yFitted, residuals=as.vector(errors),
mu=mu, scale=scale, distribution=distribution, logLik=-CFValue,
df.residual=obsInsample-df, df=df, call=cl, rank=df,
data=matrix(as.matrix(dataWork[,c(responseName,variablesNames[-1])]), ncol=nVariables,
dimnames=list(NULL, c(responseName,variablesNames[-1]))),
occurrence=occurrence, subset=subset);
occurrence=occurrence, subset=subset, other=ellipsis);
return(structure(finalModel,class=c("alm","greybox")));
}
62 changes: 48 additions & 14 deletions R/methods.R
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,22 @@ predict.alm <- function(object, newdata, interval=c("none", "confidence", "predi
}
greyboxForecast$scale <- bValues;
}
else if(object$distribution=="dalaplace"){
# Use the connection between the variance and MAE in Laplace distribution
bValues <- rep(object$scale, length(greyboxForecast$mean));
if(interval!="n"){
warning("We don't have the proper prediction intervals for ALD yet. The uncertainty is underestimated!", call.=FALSE);
if(length(levelUp)==1){
levelUp <- rep(levelUp, length(greyboxForecast$mean));
levelLow <- rep(levelLow, length(greyboxForecast$mean));
}
for(i in 1:h){
greyboxForecast$lower[i] <- qalaplace(levelLow[i],greyboxForecast$mean[i],bValues[i],object$other$alpha);
greyboxForecast$upper[i] <- qalaplace(levelUp[i],greyboxForecast$mean[i],bValues[i],object$other$alpha);
}
}
greyboxForecast$scale <- bValues;
}
else if(object$distribution=="ds"){
# Use the connection between the variance and b in S distribution
bValues <- (greyboxForecast$variances/120)^0.25;
Expand Down Expand Up @@ -701,6 +717,15 @@ predict.greybox <- function(object, newdata, interval=c("none", "confidence", "p

parameters <- coef.greybox(object);
parametersNames <- names(parameters);
ourVcov <- vcov(object);

if(object$distribution=="dalaplace"){
if(parametersNames[1]=="alpha"){
parametersNames <- parametersNames[-1];
parameters <- parameters[-1];
ourVcov <- ourVcov[-1,-1];
}
}

if(any(parametersNames=="(Intercept)")){
matrixOfxreg <- as.matrix(cbind(rep(1,nrow(newdata)),newdata[,-1]));
Expand All @@ -718,11 +743,10 @@ predict.greybox <- function(object, newdata, interval=c("none", "confidence", "p
matrixOfxreg <- matrix(matrixOfxreg, nrow=1);
}

ourForecast <- as.vector(matrixOfxreg %*% coef.greybox(object));
ourForecast <- as.vector(matrixOfxreg %*% parameters);

paramQuantiles <- qt(c(levelLow, levelUp),df=object$df.residual);

ourVcov <- vcov(object);
vectorOfVariances <- diag(matrixOfxreg %*% ourVcov %*% t(matrixOfxreg));

if(interval=="c"){
Expand Down Expand Up @@ -804,6 +828,12 @@ nParam.default <- function(object, ...){
return(length(coef(object))+1);
}

#' @export
nParam.alm <- function(object, ...){
# The length of the vector of parameters + variance
return(object$df);
}

#' @export
nParam.logLik <- function(object, ...){
# The length of the vector of parameters + variance
Expand Down Expand Up @@ -1006,14 +1036,15 @@ print.summary.alm <- function(x, ...){

distrib <- switch(x$distribution,
"dnorm" = "Normal",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dlaplace" = "Laplace",
"dlogis" = "Logistic",
"dlaplace" = "Laplace",
"dalaplace" = paste0("Asymmetric Laplace with alpha=",round(x$other$alpha,2)),
"ds" = "S",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dchisq" = "Chi-Squared",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand Down Expand Up @@ -1044,14 +1075,15 @@ print.summary.greybox <- function(x, ...){

distrib <- switch(x$distribution,
"dnorm" = "Normal",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dlaplace" = "Laplace",
"dlogis" = "Logistic",
"dlaplace" = "Laplace",
"dalaplace" = "Asymmetric Laplace",
"ds" = "S",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dchisq" = "Chi-Squared",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand All @@ -1078,14 +1110,15 @@ print.summary.greyboxC <- function(x, ...){

distrib <- switch(x$distribution,
"dnorm" = "Normal",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dlaplace" = "Laplace",
"dlogis" = "Logistic",
"dlaplace" = "Laplace",
"dalaplace" = "Asymmetric Laplace",
"ds" = "S",
"dfnorm" = "Folded Normal",
"dlnorm" = "Log Normal",
"dchisq" = "Chi-Squared",
"dpois" = "Poisson",
"dnbinom" = "Negative Binomial",
"dchisq" = "Chi-Squared",
"plogis" = "Cumulative logistic",
"pnorm" = "Cumulative normal"
);
Expand Down Expand Up @@ -1173,6 +1206,7 @@ summary.alm <- function(object, level=0.95, ...){
ourReturn$ICs <- ICs;
ourReturn$distribution <- object$distribution;
ourReturn$occurrence <- object$occurrence;
ourReturn$other <- object$other;

ourReturn <- structure(ourReturn,class="summary.alm");
return(ourReturn);
Expand Down
16 changes: 11 additions & 5 deletions man/alm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 1d22422

Please sign in to comment.