XX_nonNormal_fatTails(1).R

# This script is for modeling tax revenue of stylized governments


#**********************************************************************
#                           Packages                               ####
#**********************************************************************

library(markovchain) # Markov chain object
library(MASS)        # multivariate normal generator, must be loaded before tidyverse, otherwise 'select' will be masked


library(tidyverse)
library(broom)
library(readxl)
library(magrittr)
library(ggrepel)
library(stringr)
library(forcats)
library(grid)
library(gridExtra)
library(scales)
library(knitr)

library(xlsx)

# packages for econometric and time series modeling
library(plm)
library(astsa)    # companion package
library(TSA)      # companion package;  arimax: flexible transfer function model
library(tseries)  #
library(forecast) # Arima
library(MSwM)
library(TTR)
library(dynlm)
library(broom)

#library(MSBVAR)

# packages for ts
library(zoo)
library(xts)


library(timetk)
library(tidyquant)

library(lubridate)
library(feather)

library(psych) # describe

options(tibble.print_max = 60, tibble.print_min = 60)


# check tidyquant, timetk, sweep (broom ), tibbletime
# Intro to zoo  cran.r-project.org/web/packages/zoo/vignettes/zoo-quickref.pdf
# sweep: http://www.business-science.io/code-tools/2017/07/09/sweep-0-1-0.html

#**********************************************************************
#                     Global settings and tools                    ####
#**********************************************************************
dir_data_raw <- "data_raw/"
dir_data_out <- "data_out/"
dir_fig_out <- "policyBrief_out/"


# NBER recession periods, post-WWII
recessionPeriods <- 
	matrix(c(
		1953+2/4, 1954+2/4,
		1957+3/4, 1958+2/4,
		1960+2/4, 1961+1/4,
		1969+4/4, 1970+4/4,
		1973+4/4, 1975+1/4,
		1980+1/4, 1980+3/4,
		1981+3/4, 1982+4/4,
		1990+3/4, 1991+1/4,
		2001+1/4, 2001+4/4,
		2007+4/4, 2009+2/4
	) , ncol = 2, byrow = T) %>% 
	as.data.frame() %>% 
	rename(peak =   V1,
				 trough = V2) %>% 
	mutate(peak = peak - 1/4,
				 trough = trough - 1/4)


get_logReturn <- function(x){
	if(any(x <= 0, na.rm = TRUE)) stop("Nagative value(s)")
	log(x/lag(x))
}


# RIG colors and theme
RIG.blue  <- "#003598"
RIG.red   <- "#A50021"
RIG.green <- "#009900"
RIG.yellow <- "#FFFF66"
RIG.purple <- "#9966FF"
RIG.yellow.dark <- "#ffc829"
RIG.orange <- "#fc9272"

demo.color6 <- c(RIG.red,
								 RIG.orange,
								 RIG.purple,
								 RIG.green ,
								 RIG.blue,
								 RIG.yellow.dark)


RIG.theme <- function() {
	theme(
		panel.grid.major.x = element_blank(),
		panel.grid.minor.x = element_blank(),
		panel.grid.minor.y = element_blank(),
		panel.grid.major.y = element_line(size = 0.5, color = "gray80"),
		plot.title = element_text(hjust = 0.5),
		plot.subtitle = element_text(hjust = 0.5),
		plot.caption = element_text(hjust = 0, size = 9)
	)
}

RIG.themeLite <- function() {
	theme(
		plot.title = element_text(hjust = 0.5),
		plot.subtitle = element_text(hjust = 0.5),
		plot.caption = element_text(hjust = 0, size = 9)
	)
}


#**********************************************************************
#                          Outline                                 ####
#**********************************************************************

# Goals:
  # Path of state tax revenue derived from simulated GDP, stock returns, and estimated elasticities. 
  # What to have in the results
  #   1. a deterministic simulation with constant GDP growth and stock return
  #   2. A single stochastic simulation, how different stylized governments respond to GDP and stocks differently
  #   3. A scenario, if hard to find a stochastic simulation that makes sense, construct a scenario that is similar to history. 
  #   4. Distribution of 2000 simulations (quanitles)
  #   5. Risk measures: probability of sharp decline in tax revenue, and difference stylized governments. 
 

#**********************************************************************
#     Importing simulations of GDP growth and stock return         ####
#**********************************************************************

# Notes
  # 1. Simulations are generated by Model_simulation(3).R
  # 2. What inputs to include:
  #    - simulated path of real GDP growth
  #    - Recession and expansion periods in each simulation
  #    - simulated path stock return
  #    - simulated path of bond return


# Loading simulation outputs:
load("policyBrief_out/simulation_MS1.RData")

# dfs to use:
  # df_sim_gdp_y
  # df_sim_gdp_regimes_y
  # df_sim_stockreturn_y
  # df_sim_bondreturn_y

df_sim <- 
	df_sim_gdp_regimes_y %>% 
	left_join(df_sim_gdp_y         %>% rename(gdp_chg     = return_y)) %>% 
	left_join(df_sim_stockreturn_y %>% rename(stockreturn = return_y)) %>% 
	left_join(df_sim_bondreturn_y  %>% rename(bondreturn  = return_y)) %>% 
	ungroup() %>% 
	mutate(sim = str_extract(sim, "\\d+") %>% as.numeric)

df_sim %>% head


# This module create investment return series. 

ecdf_fun <- function(x,perc) ecdf(x)(perc)

load("policyBrief_out/simulation_MS1.RData")
load("Data_out/dataAll.RData")


# #**************************************************************************************
# #                        Fat-tails in simulated returns                             ####
# #**************************************************************************************
# 
# ## simulated data
# df_sim_gdp_y %<>% as.data.frame()         %>% rename(gdp_chg = return_y)
# df_sim_stockreturn_y %<>% as.data.frame() %>% rename(stock_return   = return_y)
# df_sim_bondreturn_y %<>% as.data.frame()  %>% rename(bond_return = return_y)
# 
# df_sim <- df_sim_gdp_y %>% 
# 	left_join(df_sim_stockreturn_y) %>% 
# 	left_join(df_sim_bondreturn_y) %>% 
# 	mutate(port60_40_return = 0.6*stock_return + 0.4*bond_return)
# 
# 
# mean_sim_stock <- df_sim$stock_return %>% mean
# sd_sim_stock   <- df_sim$stock_return %>% sd
# 
# mean_port60_40 <- df_sim$port60_40_return %>% mean
# sd_port60_40   <- df_sim$port60_40_return %>% sd
# 
# 
# df_sim %<>% mutate(port_norm  = rnorm(nrow(df_sim), mean_port60_40, sd_port60_40),
# 									 stock_norm = rnorm(nrow(df_sim), mean_sim_stock, sd_sim_stock))
# 
# 
# qts <- c(0.005, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.995)
# 
# # comparing percentiles 
# quantile(df_sim$stock_return, qts)
# quantile(df_sim$stock_norm,   qts)
# 
# quantile(df_sim$port60_40_return, qts)
# quantile(df_sim$port_norm,   qts)
# 
# 
# df_probSim <- 
# 	df_sim %>% 
# 	select(stock_return, stock_norm, port60_40_return, port_norm) %>% 
# 	gather(var, value) %>% 
# 	group_by(var) %>% 
# 	summarise(prob40 = ecdf_fun(value, -0.4),
# 						prob30 = ecdf_fun(value, -0.3),
# 						prob20 = ecdf_fun(value, -0.2),
# 						prob10 = ecdf_fun(value, -0.1))


#**************************************************************************************
#           Fat tails in bootstrapped returns                                      ####
#**************************************************************************************

nsim <- 5000

# Historical data
df_hist <- 
	df_dataAll_y %>% 
	select(year, LCapStock_TRI, LTGBond_TRI, CBond_TRI) %>% 
	mutate_at(vars(-year), funs(log(./lag(.)))) %>% 
	mutate(mix_70_30_hist = 0.7 * LCapStock_TRI + 0.3 * LTGBond_TRI) %>% 
	filter(year %in% 1955:2015)

{
set.seed(123)
df_boot <- df_hist[c("LCapStock_TRI", "LTGBond_TRI")][sample(1:nrow(df_hist), nsim, replace = TRUE), ] %>% 
	rename(stock_boot = LCapStock_TRI, 
				 GBond_boot = LTGBond_TRI) %>% 
	mutate(mix70_30_boot = 0.7*stock_boot + 0.3*GBond_boot)

# stock_boot <- sample(df_hist$LCapStock_TRI, nrow(df_sim), replace = TRUE)
# GBond_boot <- sample(df_hist$LTGBond_TRI,   nrow(df_sim), replace = TRUE)
# boot_port60_40 <- 0.6*boot_stock + 0.4*boot_GBond


# Mean, sd, and correlation coefficient from strapped sample
mean_boot_stock <- mean(df_boot$stock_boot); mean_boot_stock
sd_boot_stock   <- sd(df_boot$stock_boot);     sd_boot_stock

mean_boot_GBond <- mean(df_boot$GBond_boot); mean_boot_GBond
sd_boot_GBond   <- sd(df_boot$GBond_boot);     sd_boot_GBond

cov_stockBond <- cov(df_boot$stock_boot, df_boot$GBond_boot); cov_stockBond
cor_stockBond <- cor(df_boot$stock_boot, df_boot$GBond_boot); cor_stockBond


# Generating stock and bond returns with Multivariate Normal distribution

sim_normal <- mvrnorm(nsim, 
								    	mu = c(mean_boot_stock, mean_boot_GBond), 
								    	Sigma = matrix(c(sd_boot_stock^2, cov_stockBond, cov_stockBond, sd_boot_GBond^2 ), 2)) 
}
sim_normal[,1] %>% mean; sim_normal[,1] %>% sd
sim_normal[,2] %>% mean; sim_normal[,2] %>% sd
cor(sim_normal[,1], sim_normal[,2])


df_boot %<>% 
	mutate(stock_norm = sim_normal[,1],
				 GBond_norm = sim_normal[,2],
				 mix_70_30_norm = 0.7*stock_norm + 0.3*GBond_norm)


df_probBoot <- 
	df_boot %>% 
	select(stock_boot, stock_norm, mix70_30_boot, mix_70_30_norm) %>% 
	gather(var, value) %>% 
	group_by(var) %>% 
	summarise(prob40 = ecdf_fun(value, -0.4),
						prob30 = ecdf_fun(value, -0.3),
						prob20 = ecdf_fun(value, -0.2),
						prob10 = ecdf_fun(value, -0.1))
df_probBoot


df_boot %>% 
	select(stock_boot, stock_norm) %>% 
	gather(var, value) %>% 
	ggplot(aes(x = value, color = var)) + 
	stat_ecdf()


df_boot %>% 
	select(stock_boot, stock_norm) %>% 
	gather(var, value) %>% 
	ggplot(aes(x = value, color = var)) + 
	geom_density()


#df_hist$LCapStock_TRI %>% sd


df_stock <- data.frame(x = seq(-0.7, 0.7, 0.01),  pdf_norm = dnorm(seq(-0.7, 0.7, 0.01), mean = mean_boot_stock, sd = sd_boot_stock))
df_hist %>% 
	ggplot(aes(x = LCapStock_TRI)) + theme_bw() + RIG.themeLite() + 
	geom_density(color = "blue") + 
	geom_line(aes(x = x, y = pdf_norm), data = df_stock, color = "red") + 
	scale_x_continuous(breaks = seq(-1, 1, 0.1)) + 
	labs(x = "Rate of return",
			 title = "Comparing empirical distribution of annual stock return \nand normal distribution")


df_GBond <- data.frame(x = seq(-0.7, 0.7, 0.01),  pdf_norm = dnorm(seq(-0.7, 0.7, 0.01), mean = mean_boot_GBond, sd = sd_boot_GBond))
df_hist %>% 
	ggplot(aes(x = LTGBond_TRI)) + theme_bw() + RIG.themeLite() + 
	geom_density(color = "blue") + 
	geom_line(aes(x = x, y = pdf_norm), data = df_GBond, color = "red") + 
	scale_x_continuous(breaks = seq(-1, 1, 0.1)) + 
	labs(x = "Rate of return") + 
	labs(x = "Rate of return",
			 title = "Comparing empirical distribution of annual long-term government bond return \nand normal distribution")


mean_hist_mix <- mean(df_hist$mix_70_30_hist); mean_hist_mix
sd_hist_mix   <- sd(df_hist$mix_70_30_hist);   sd_hist_mix
df_mix <- data.frame(x = seq(-0.7, 0.7, 0.01),  pdf_norm = dnorm(seq(-0.7, 0.7, 0.01), mean = mean_hist_mix, sd = sd_hist_mix))
df_hist %>% 
	ggplot(aes(x = mix_70_30_hist)) + theme_bw() + RIG.themeLite() + 
	geom_density(color = "blue") + 
	geom_line(aes(x = x, y = pdf_norm), data = df_mix, color = "red") + 
	scale_x_continuous(breaks = seq(-1, 1, 0.1)) + 
	labs(x = "Rate of return",
			 title = "Comparing empirical distribution of return of a 70/30 portfolio \nand normal distribution")


write.xlsx2(df_probSim,  "Data_SimMacro/Table_probsFatTail.xlsx", sheet = "sim")
write.xlsx2(df_probBoot, "Data_SimMacro/Table_probsFatTail.xlsx", sheet = "boot", append = T)