-
Notifications
You must be signed in to change notification settings - Fork 0
/
ATO_Snapshot_table_7_analysis.R
73 lines (54 loc) · 2.91 KB
/
ATO_Snapshot_table_7_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# 0. SETUP ENVIRONMENT ----------------------------------------------------
library("readr")
library("tidyr")
library("dplyr")
library("ggplot2")
# 1. GET DATA -------------------------------------------------------------
Data_1 <- list()
Data_1$ATO_Snapshot_table_7B <- read_csv("data/ATO_Snapshot_table_7_data/ATO_Snapshot_table_7B.csv")
Data_1$ATO_Snapshot_table_7C <- read_csv("data/ATO_Snapshot_table_7_data/ATO_Snapshot_table_7C.csv")
# 2. CLEAN DATA ------------------------------------------------------------
Data_2 <- Data_1
if(exists("Analysis") != TRUE) {
Analysis <- list()
}
Data_2$ATO_Snapshot_table_7B_state_summary <- Data_2$ATO_Snapshot_table_7B %>%
filter(`Top or bottom` == "Bottom") %>%
group_by(State = `State/ Territory1`) %>%
summarise(`Individuals no.` = sum(`Individuals\r\nno.`),
`Average taxable income or loss` = sum(`Average taxable income or loss3\r\n$`),
`Median taxable income or loss` = sum(`Median taxable income or loss3\r\n$`),
`Australian rank descending` = sum(`Australian rank5 descending`))
# 3. TIDY DATA ------------------------------------------------------------
Data_3 <- Data_2
Data_3$ATO_Snapshot_table_7B_state_summary_gathered <- Data_3$ATO_Snapshot_table_7B_state_summary %>%
gather(`Individuals no.`,
`Average taxable income or loss`,
`Median taxable income or loss`,
`Australian rank descending`,
key = "variable",
value = "value")
# 4. VISUALISE DATA -------------------------------------------------------
Data_4 <- Data_3
if(exists("Analysis") != TRUE) {
Analysis <- list()
}
Analysis$Tables$ATO_Snapshot_table_7B_state_summary_gathered <- Data_4$ATO_Snapshot_table_7B_state_summary_gathered
Analysis$Plots$ATO_Snapshot_table_7B_state_avgTaxIncome <- Analysis$Tables$ATO_Snapshot_table_7B_state_summary_gathered %>%
filter(variable == "Average taxable income or loss") %>%
ggplot(aes(x = State, y = value)) +
geom_bar(stat = 'identity') +
labs(title = "Bottom 10 Postcodes per State based on Avg Taxable Income", y = "Avg Taxable Income ($)") +
theme(plot.title = element_text(hjust = 0.5))
Analysis$Plots$ATO_Snapshot_table_7B_state_medTaxIncome <- Analysis$Tables$ATO_Snapshot_table_7B_state_summary_gathered %>%
filter(variable == "Median taxable income or loss") %>%
ggplot(aes(x = State, y = value)) +
geom_bar(stat = 'identity') +
labs(title = "Bottom 10 Postcodes per State based on Mediann Taxable Income", y = "Median Taxable Income ($)") +
theme(plot.title = element_text(hjust = 0.5))
Analysis$Plots$ATO_Snapshot_table_7B_state_NumberOfIndividuals <- Analysis$Tables$ATO_Snapshot_table_7B_state_summary_gathered %>%
filter(variable == "Individuals no.") %>%
ggplot(aes(x = State, y = value)) +
geom_bar(stat = 'identity') +
labs(title = "Bottom 10 Postcodes per State Number of Individuals", y = "No. of Individuals") +
theme(plot.title = element_text(hjust = 0.5))