-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.R
193 lines (149 loc) · 7.85 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#require(flexdashboard) # Dashboard package
#require(highcharter) # Interactive data visualizations
#require(viridis) # Color gradients
#require(tidyverse)
#require(countrycode) # Converting country names/codes
#require(DT) # Displaying data tables
#require(crosstalk) # Provides interactivity for HTML widgets
#require(plotly) # Interactive data visualizations
#require(shiny) #shiny
#require(sandwich)
#require(stargazer)
#require(Rcpp)
# This function makes life easier to who is going to open the app through `runGitHub()` because it installs every needed package automatically
automate_loading <- function(){
if(!require('pacman'))install.packages('pacman')
pacman::p_load(tidyverse,shiny,readr,sandwich,stargazer,flexdashboard,highcharter,viridis,countrycode,plotly,Rcpp,crosstalk,DT)
}
automate_loading()
data_path <- file.path(".","master.csv")
data <- read_csv(data_path) %>% filter(year != 2016, country != 'Dominica', country != 'Saint Kitts and Nevis')
# Fix the names of some of the countries in our data to match the country names
# used by our map later on so that they'll be interpreted and displayed.
data <- data %>% mutate(
country = fct_recode(country, "The Bahamas" = "Bahamas"),
country = fct_recode(country, "Cape Verde" = "Cabo Verde"),
country = fct_recode(country, "South Korea" = "Republic of Korea"),
country = fct_recode(country, "Russia" = "Russian Federation"),
country = fct_recode(country, "Republic of Serbia" = "Serbia"),
country = fct_recode(country, "United States of America" = "United States")
)
# Reorder levels of age to be in chronological order.
data$age <- factor(data$age, levels = c("5-14 years", "15-24 years", "25-34 years", "35-54 years", "55-74 years", "75+ years"))
custom_theme <- hc_theme(
colors = c('#5CACEE', 'green', 'red'),
chart = list(
backgroundColor = '#FAFAFA',
plotBorderColor = "black"),
xAxis = list(
gridLineColor = "C9C9C9",
labels = list(style = list(color = "#333333")),
lineColor = "#C9C9C9",
minorGridLineColor = "#C9C9C9",
tickColor = "#C9C9C9",
title = list(style = list(color = "#333333"))),
yAxis = list(
gridLineColor = "#C9C9C9",
labels = list(style = list(color = "#333333")),
lineColor = "#C9C9C9",
minorGridLineColor = "#C9C9C9",
tickColor = "#C9C9C9",
tickWidth = 1,
title = list(style = list(color = "#333333"))),
title = list(style = list(color = '#333333', fontFamily = "Lato")),
subtitle = list(style = list(color = '#666666', fontFamily = "Lato")),
legend = list(
itemStyle = list(color = "#333333"),
itemHoverStyle = list(color = "#FFF"),
itemHiddenStyle = list(color = "#606063")),
credits = list(style = list(color = "#666")),
itemHoverStyle = list(color = 'gray'))
# Create tibble for our line plot.
overall_tibble <- data %>%
select(year, suicides_no, population) %>%
group_by(year) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
sex_color <- c("#83c99d", "#95a2f0") # baby blue & pink
sex_tibble <- data %>%
select(year, sex, suicides_no, population) %>%
group_by(year,sex) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
pie_sex <- data %>%
select(sex, suicides_no, population) %>%
group_by(sex) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
# Create tibble for age so we can use it when creating our line plot.
age_tibble <- data %>%
select(year, age, suicides_no, population) %>%
group_by(year, age) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
# Create tibble for overall suicides by country
country_bar <- data %>%
select(country, suicides_no, population) %>%
group_by(country) %>%
summarise(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2)) %>%
arrange(desc(suicide_capita))
# Create a tibble with suicide per capita by country for 1985-2015.
country_tibble <- data %>%
select(country, suicides_no, population) %>%
group_by(country) %>%
summarize(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
# Create new column in our data for continent. Use countrycode() to extract continents from country names.
data$continent <- countrycode(sourcevar = data$country,
origin = "country.name",
destination = "continent")
# Reclassify countries that have been coded as 'Americas', by countrycode(), into 'North America' and 'South America'.
south_america <- c('Argentina', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Suriname', 'Uruguay')
data$continent[data$country %in% south_america] <- 'South America'
data$continent[data$continent=='Americas'] <- 'North America'
# Create a tibble for continent and sex. -----
continent_sex_tibble <- data %>%
select(continent, sex, suicides_no, population) %>%
group_by(continent, sex) %>%
summarize(suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
dt <- data %>%
select(country, year, suicides_no, population, `gdp_per_capita ($)`,`suicides/100k pop`) %>%
group_by(country, year, `gdp_per_capita ($)`,`suicides/100k pop`)
avg_dt <- dt %>%
group_by(country)%>%
summarise(avg_gdp = round(sum(`gdp_per_capita ($)`)/length(`gdp_per_capita ($)`)),
avg_sui = sum(`suicides/100k pop`)/length(`suicides/100k pop`)
)
country_year_tibble <- data %>% select(
country, year, suicides_no, population) %>% group_by(
country, year) %>% summarise(
suicide_capita = round((sum(suicides_no)/sum(population))*100000, 2))
### Let's perform some basic analysy
plm_id_fix <- lm(`suicides/100k pop` ~ `gdp_per_capita ($)` + sex + age+ country -1, data = data)
summary(plm_id_fix)
# robust standard errors
rob_se_pan <- list(sqrt(diag(vcovHC(plm_id_fix, type = "HC1"))))
res <- stargazer(plm_id_fix,covariate.labels = "GDP Per Capita",
header = F,
type = "html",
omit.table.layout = "n",
digits = 3,
dep.var.labels.include = T,
se = rob_se_pan)
a <- res[1:13]
res <- append(a, res[319:325])
# I have to create a list of named countries with their 'value' associated 'cause that's
# the input that the slider takes. -----
list_x <- as.character(avg_dt$country)
Rcpp::sourceCpp('enlist.cpp',showOutput=F,verbose = F)
country_list <- enlist(list_x)
### Same function but in base R and consequent benchmark to compare their speed -----
### Here I comment the code because my optimization would be pointless otherwise -----
# list_countries <- function(country) {
# x <- vector()
# x <- list(country)
# names(x) <- country
# return(x)
# }
#
# xd <- vector(mode = 'list')
# for (i in list_x){
# xd <- append(xd, list_countries(i))
# }
#### Let's prove that the Rcpp function is more than 100x faster with a simple test -----
# microbenchmark(for (i in list_x){xd <- append(xd, list_countries(i))}, enlist(list_x))