Skip to content

Commit 4f68283

Browse files
author
btskinner
committed
fully working version
2 parents 19ba9b5 + adea8e9 commit 4f68283

18 files changed

Lines changed: 30827 additions & 39907 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
_*
33
!_layouts/
44
!_includes/
5+
!_sass/
6+
!_scripts/
57
!_config.yml
8+
# !_config_dev.yml
69
*.DS_Store
710
*.Rhistory

_scripts/make_data.R

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
################################################################################
2+
##
3+
## <PROJ> College Advising Map
4+
## <FILE> make_data.R
5+
## <AUTH> Benjamin Skinner
6+
## <INIT> September 2017
7+
##
8+
################################################################################
9+
10+
## libraries
11+
libs <- c('tidyverse', 'geojsonio', 'sp', 'rgdal', 'stringr')
12+
lapply(libs, require, character.only = TRUE)
13+
14+
## paths
15+
ddir <- '../assets/data'
16+
jdir <- '../assets/js'
17+
rdir <- file.path(ddir, '_raw')
18+
19+
## crosswalk
20+
cw <- read_csv(file.path(ddir, 'stcrosswalk.csv'))
21+
22+
################################################################################
23+
## FUNCTIONS
24+
################################################################################
25+
26+
## slight modification of stringr::str_to_title
27+
str_to_title_mod <- function(x) {
28+
x <- str_to_title(x)
29+
## Of --> of
30+
x <- gsub(' Of ', ' of ', x, fixed = TRUE)
31+
## And --> and
32+
x <- gsub(' And ', ' and ', x, fixed = TRUE)
33+
## The --> the
34+
x <- gsub(' The ', ' the ', x, fixed = TRUE)
35+
## A --> a
36+
x <- gsub(' A ', ' a ', x, fixed = TRUE)
37+
return(x)
38+
}
39+
40+
## write dataframe to JS array with no unnecessary whitespace
41+
writeJSArray <- function(df, array_name, vars, outfile) {
42+
43+
df <- df[,names(df) %in% vars]
44+
## maintain 1 index (JS is zero-index) for sanity
45+
lines <- paste0(array_name,'=[{}')
46+
47+
for (i in 1:nrow(df)) {
48+
## line <- paste0(array_name, '[', i, ']={')
49+
line <- paste0('{')
50+
for (v in 1:length(vars)) {
51+
if (!is.na(df[i, grep(vars[v], names(df))])) {
52+
if (v == 1) {
53+
} else {
54+
line <- paste0(line,',')
55+
}
56+
if (class(df[[vars[v]]]) == 'character') {
57+
line <- paste0(line, vars[v], ':"',
58+
df[i, grepl(vars[v], names(df))], '"')
59+
} else {
60+
line <- paste0(line, vars[v], ':',
61+
df[i, grepl(vars[v], names(df))])
62+
}
63+
64+
}
65+
}
66+
line <- paste0(line, '}')
67+
if (i == nrow(df)) { line <- paste0(line, '];') }
68+
lines <- c(lines,line)
69+
}
70+
writeLines(paste(lines, collapse = ','), outfile)
71+
}
72+
73+
################################################################################
74+
## CLEAN DATA
75+
################################################################################
76+
77+
## -------------------------------------
78+
## COLLEGE
79+
## -------------------------------------
80+
81+
## read in school data
82+
college <- read_csv(file.path(rdir, 'HD2015.zip')) %>%
83+
setNames(tolower(names(.))) %>%
84+
select(instnm, fips, sector, lon = longitud, lat = latitude) %>%
85+
filter(sector %in% c(1,2,4,5),
86+
fips %in% cw$stfips) %>%
87+
mutate(lon = as.numeric(lon),
88+
lat = as.numeric(lat),
89+
cat = ifelse(sector == 1, 5,
90+
ifelse(sector == 2, 6,
91+
ifelse(sector == 4, 7,
92+
ifelse(sector == 5, 8, 0))))) %>%
93+
select(-sector) %>%
94+
filter(!is.na(lon),
95+
!is.na(lat))
96+
97+
## -------------------------------------
98+
## HIGH SCHOOL
99+
## -------------------------------------
100+
101+
## read in school data
102+
hs <- read_csv(file.path(rdir, 'school_level_clean_2.csv')) %>%
103+
setNames(tolower(names(.))) %>%
104+
select(nces_id,
105+
nces_dist_id,
106+
instnm = school_name,
107+
fips = school_state_fips,
108+
lon = school_longitude,
109+
lat = school_latitude,
110+
enroltot = school_enrollment_total,
111+
frlpct = school_frl_pct,
112+
csr = school_student_counselor_ratio) %>%
113+
mutate(fips = as.integer(fips)) %>%
114+
filter(fips %in% cw$stfips) %>%
115+
mutate(instnm = str_to_title_mod(instnm),
116+
lon = as.numeric(lon),
117+
lat = as.numeric(lat)) %>%
118+
filter(!is.na(lon),
119+
!is.na(lat))
120+
121+
hs_imp <- hs %>%
122+
group_by(fips) %>%
123+
summarise(csr_mean = mean(csr, na.rm = TRUE)) %>%
124+
ungroup()
125+
126+
## advising programs at school level
127+
advise_tmp <- read_csv(file.path(rdir, 'advising_program_school_clean.csv')) %>%
128+
setNames(tolower(names(.))) %>%
129+
mutate(trio_subprogram = ifelse(trio_subprogram == 'N/A', NA,
130+
trio_subprogram)) %>%
131+
## need to make wide
132+
arrange(nces_id) %>%
133+
group_by(nces_id) %>%
134+
mutate(count = seq(n())) %>%
135+
rename(org_1 = organization_name,
136+
div_1 = division_name,
137+
tri_1 = trio_subprogram,
138+
web_1 = website) %>%
139+
mutate(org_2 = ifelse(count == 2, org_1, NA),
140+
div_2 = ifelse(count == 2, div_1, NA),
141+
tri_2 = ifelse(count == 2, tri_1, NA),
142+
web_2 = ifelse(count == 2, web_1, NA),
143+
org_3 = ifelse(count == 3, org_1, NA),
144+
div_3 = ifelse(count == 3, org_1, NA),
145+
tri_3 = ifelse(count == 3, org_1, NA),
146+
web_3 = ifelse(count == 3, org_1, NA),
147+
org_4 = ifelse(count == 4, org_1, NA),
148+
div_4 = ifelse(count == 4, org_1, NA),
149+
tri_4 = ifelse(count == 4, org_1, NA),
150+
web_4 = ifelse(count == 4, org_1, NA),
151+
org_5 = ifelse(count == 5, org_1, NA),
152+
div_5 = ifelse(count == 5, org_1, NA),
153+
tri_5 = ifelse(count == 5, org_1, NA),
154+
web_5 = ifelse(count == 5, org_1, NA),
155+
org_6 = ifelse(count == 6, org_1, NA),
156+
div_6 = ifelse(count == 6, org_1, NA),
157+
tri_6 = ifelse(count == 6, org_1, NA),
158+
web_6 = ifelse(count == 6, org_1, NA),
159+
org_7 = ifelse(count == 7, org_1, NA),
160+
div_7 = ifelse(count == 7, org_1, NA),
161+
tri_7 = ifelse(count == 7, org_1, NA),
162+
web_7 = ifelse(count == 7, org_1, NA),
163+
org_8 = ifelse(count == 8, org_1, NA),
164+
div_8 = ifelse(count == 8, org_1, NA),
165+
tri_8 = ifelse(count == 8, org_1, NA),
166+
web_8 = ifelse(count == 8, org_1, NA))
167+
168+
advise <- advise_tmp %>%
169+
filter(count == 1) %>%
170+
select(nces_id, ends_with('_1')) %>%
171+
left_join(advise_tmp %>%
172+
filter(count == 2) %>%
173+
select(nces_id, ends_with('_2')), by = 'nces_id') %>%
174+
left_join(advise_tmp %>%
175+
filter(count == 3) %>%
176+
select(nces_id, ends_with('_3')), by = 'nces_id') %>%
177+
left_join(advise_tmp %>%
178+
filter(count == 4) %>%
179+
select(nces_id, ends_with('_4')), by = 'nces_id') %>%
180+
left_join(advise_tmp %>%
181+
filter(count == 5) %>%
182+
select(nces_id, ends_with('_5')), by = 'nces_id') %>%
183+
left_join(advise_tmp %>%
184+
filter(count == 6) %>%
185+
select(nces_id, ends_with('_6')), by = 'nces_id') %>%
186+
left_join(advise_tmp %>%
187+
filter(count == 7) %>%
188+
select(nces_id, ends_with('_7')), by = 'nces_id') %>%
189+
left_join(advise_tmp %>%
190+
filter(count == 8) %>%
191+
select(nces_id, ends_with('_8')), by = 'nces_id') %>%
192+
unite(advise_org, starts_with('org_'), sep = '|') %>%
193+
mutate(advise_org = gsub('NA', '', advise_org)) %>%
194+
unite(advise_div, starts_with('div_'), sep = '|') %>%
195+
mutate(advise_div = gsub('NA', '', advise_div)) %>%
196+
unite(advise_tri, starts_with('tri_'), sep = '|') %>%
197+
mutate(advise_tri = gsub('NA', '', advise_tri)) %>%
198+
unite(advise_web, starts_with('web_'), sep = '|') %>%
199+
mutate(advise_web = gsub('NA', '', advise_web))
200+
201+
## merge into high school data
202+
hs <- hs %>%
203+
## left_join(dist) %>%
204+
left_join(advise) %>%
205+
left_join(hs_imp) %>%
206+
select(-starts_with('nces_')) %>%
207+
mutate(csr_flag = as.integer(is.na(csr)),
208+
csr = ifelse(is.na(csr), csr_mean, csr),
209+
cat = ifelse(!is.na(advise_org) & !is.na(csr), 1,
210+
ifelse(is.na(advise_org) & !is.na(csr), 2,
211+
ifelse(!is.na(advise_org) & is.na(csr), 3,
212+
ifelse(is.na(advise_org) & is.na(csr), 4, 0)))))
213+
214+
## -------------------------------------
215+
## COMMUNITY
216+
## -------------------------------------
217+
218+
## advising programs at school level
219+
community <- read_csv(file.path(rdir, 'advising_program_community_clean.csv')) %>%
220+
setNames(tolower(names(.))) %>%
221+
rename(advise_org = organization_name,
222+
advise_div = division_name,
223+
advise_web = website)
224+
225+
## get zipcode geo
226+
zipgeo <- read_tsv(file.path(rdir, '2016_Gaz_zcta_national.zip')) %>%
227+
setNames(tolower(names(.))) %>%
228+
mutate(zip = geoid,
229+
lon = intptlong,
230+
lat = intptlat) %>%
231+
select(zip, lon, lat)
232+
233+
## merge to community
234+
community <- community %>%
235+
left_join(zipgeo) %>%
236+
mutate(cat = 9) %>%
237+
## need to fix!
238+
drop_na(lon)
239+
240+
################################################################################
241+
## COMBINE & WRITE
242+
################################################################################
243+
244+
## bind
245+
df <- bind_rows(college, hs, community) %>%
246+
mutate(z = row_number(), # redundant id #
247+
cat = as.integer(cat)) %>%
248+
## rename for very small names
249+
rename(a = cat, # a := category
250+
b = instnm, # b := name
251+
c = fips, # c := fips
252+
d = enroltot, # d := enrollment (hs)
253+
e = frlpct, # e := frpl pct (hs)
254+
f = csr, # f := stu/cou ratio (hs)
255+
g = csr_flag, # g := missing csr
256+
h = zip, # h := zip code
257+
i = advise_org, # i := organization name
258+
j = advise_div, # h := division name
259+
k = advise_tri, # i := trio subprogram
260+
l = advise_web) # l := website
261+
262+
## split by schools/community and college
263+
df_coll <- df %>% filter(a %in% c(5:8))
264+
df_icon <- df %>% filter(a %in% c(0:4,9))
265+
266+
## set up as SP data frame
267+
lonlat_coll <- df_coll %>% select(lon, lat) %>% as.matrix()
268+
dfsp_coll <- SpatialPointsDataFrame(lonlat_coll, df_coll %>% select(z),
269+
proj4string = CRS('+init=epsg:3857'))
270+
lonlat_icon <- df_icon %>% select(lon, lat) %>% as.matrix()
271+
dfsp_icon <- SpatialPointsDataFrame(lonlat_icon, df_icon %>% select(z),
272+
proj4string = CRS('+init=epsg:3857'))
273+
274+
## write as geojson
275+
geojson_write(input = dfsp_coll, file = file.path(ddir, 'college.geojson'))
276+
geojson_write(input = dfsp_icon, file = file.path(ddir, 'icon.geojson'))
277+
278+
## write all data as minified JS
279+
writeJSArray(df, 's', letters[1:12], file.path(jdir, 'all_icon_array.js'))
280+
281+
## =============================================================================
282+
## END FILE
283+
################################################################################

0 commit comments

Comments
 (0)