-
Notifications
You must be signed in to change notification settings - Fork 1
/
.Rhistory
52 lines (52 loc) · 1.54 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
library(Lahman) # Our source data
library(tidyverse) # "Metapackage," don't leave home without it!
library(psych) # Descriptive stats
library(skimr) # Data profiling
library(writexl) # Write to Excel -- data dump
library(openxlsx) # Write to Excel -- plus
data(package = 'Lahman')
head(Teams)
head(Salaries)
teams <- Teams %>%
select(yearID, teamID, name, attendance, W)
head(teams)
# Get the total annual salaries by team
payroll <- Salaries %>%
group_by(yearID, teamID) %>%
summarise(payroll = sum(salary))
head(payroll)
min(Teams$yearID)
min(payroll$yearID)
# Drop the years that don't have payroll data
teams_merged <- teams %>%
inner_join(payroll)
head(teams_merged)
sum(teams_merged$payroll) == sum(Salaries$salary)
# Shape
dim(teams_merged)
# Summary statistics
summary(teams_merged)
# Distribution of payrolls
ggplot(data = teams_merged, aes(x = payroll)) +
geom_histogram()
# What could be causing this?
# Distribution of wins
ggplot(data = teams_merged, aes(x = W)) +
geom_histogram()
# Relationship between payroll and attendance
ggplot(data = teams_merged, aes(x = payroll, y = attendance)) +
geom_point()
# Relationship between payroll and wins
ggplot(data = teams_merged, aes(x = payroll, y = W)) +
geom_point()
# What if we take the log of payrolls?
teams_merged$log_payroll <- log(teams_merged$payroll)
head(teams_merged)
# Relationship between payroll and attendance
ggplot(data = teams_merged, aes(x = log_payroll, y = attendance)) +
geom_point() +
geom_smooth(method = 'lm')
# What can this function do and not?
# Awesome Easter Egg
?write_xlsx
?openxlsx