-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
initial commit of A10 data prep file.
- Loading branch information
Showing
1 changed file
with
159 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
--- | ||
title: "Am10_dataprep" | ||
author: "Min-Yang Lee" | ||
date: "2024-10-28" | ||
output: html_document | ||
--- | ||
|
||
|
||
1. You must be able to see this folder ``nefscfile\BLAST\READ-SSB-Lee-MRIP-BLAST\data_folder\raw``. It contains the raw MRIP data that has been imported into Rds format. These were constructed with ``https://github.com/NEFSC/READ-SSB-MRIP-BLAST/R_code/data_extracting_processing/extraction/pull_in_MRIP.R``. | ||
|
||
|
||
|
||
```{r global_options, include=FALSE} | ||
library("here") | ||
library("data.table") | ||
library("tidyverse") | ||
library("knitr") | ||
library("lubridate") | ||
library("scales") | ||
library("fredr") | ||
library("kableExtra") | ||
library("censusapi") | ||
library("tigris") | ||
library("mapview") | ||
library("haven") | ||
library("survey") | ||
library("srvyr") | ||
#turn off scientific notation | ||
options(scipen=999) | ||
#Handle single PSUs | ||
options(survey.lonely.psu = "certainty") | ||
options(tigris_use_cache = TRUE) | ||
here::i_am("writing/Amendment10_data_prep.Rmd") | ||
############################################################################# | ||
#knitr options | ||
knitr::opts_chunk$set(echo=FALSE, warning = FALSE, error = FALSE, message = FALSE, comment = FALSE, cache = FALSE, progress = TRUE, verbose = FALSE, | ||
dpi = 600) | ||
options(tinytex.verbose = TRUE) | ||
# options(knitr.table.format = "latex") | ||
############################################################################# | ||
# RFA data | ||
RFA_filepath<-file.path("//nefscdata","RFA_EO12866_Guidelines" ,"Ownership Data", "current data and metadata","affiliates_2024_06_01.Rdata") | ||
BLAST_raw_filepath<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST", "data_folder","raw") | ||
local_BLAST_folder<-file.path("V:","READ-SSB-Lee-MRIP-BLAST") | ||
network_BLAST_folder<-file.path("//nefscfile","BLAST" ,"READ-SSB-Lee-MRIP-BLAST") | ||
############################################################################# | ||
# The census data that I'm using reports out 2022 Inflation adjusted dollars. Therefore, it makes sense to adjust everything to base year 2022. | ||
deflate_by <- "year" | ||
base_year = 2022 # base year for GDP deflator - Maximum = 2020, Minimum = 1947 - max(GDPDEF_quarterly$Year) | ||
vintage_string<-Sys.Date() | ||
vintage_string<-gsub("-","_",vintage_string) | ||
``` | ||
|
||
|
||
```{r get_census, eval=TRUE} | ||
# Pull in and tidy up some Census data on household income | ||
income <- getCensus( | ||
name = "acs/acs5/subject", | ||
vintage = 2022, | ||
vars = c("S1901_C01_001E", "S1901_C01_010E","S1901_C01_011E","S1901_C01_012E","S1901_C01_013E","S0101_C01_001E", "S0101_C02_028E","S0601_C01_001E","S0601_C01_022E"), | ||
region = "zip code tabulation area:*", | ||
show_call=FALSE) | ||
income<-income %>% | ||
rename ( households=S1901_C01_001E, | ||
household_inc_150_200pct =S1901_C01_010E, | ||
household_inc_200pct=S1901_C01_011E, | ||
household_median_inc=S1901_C01_012E, | ||
household_mean_inc=S1901_C01_013E, | ||
total_population=S0101_C01_001E, | ||
population_pct_over60=S0101_C02_028E, | ||
total_population_s0601=S0601_C01_001E, | ||
pct_white_alone =S0601_C01_022E | ||
) %>% | ||
mutate(obscured_median=case_when(household_median_inc<0 ~ 1, .default=0), | ||
obscured_mean=case_when(household_mean_inc<0 ~ 1, .default=0) | ||
) | ||
# Read in the state-zcta correspondence file. | ||
zcta_state<-read_delim("https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_county20_natl.txt", delim="|", guess_max=2000) | ||
# Contract down to the fraction in each area. | ||
zcta_state <- zcta_state %>% | ||
dplyr::filter(is.na(OID_ZCTA5_20)==FALSE) %>% | ||
group_by(OID_ZCTA5_20) %>% | ||
mutate(total_area=sum(AREALAND_PART)) %>% | ||
mutate(fraction=AREALAND_PART/total_area) %>% | ||
select(c(GEOID_ZCTA5_20, OID_ZCTA5_20,OID_COUNTY_20, GEOID_COUNTY_20, fraction)) | ||
# Pull in and tidy up some Census data on household income at the state level | ||
county_income <- getCensus( | ||
name = "acs/acs5/subject", | ||
vintage = 2022, | ||
vars = c("S1901_C01_012E","S1901_C01_013E"), | ||
region = "county:*", | ||
show_call=FALSE) | ||
county_income<-county_income %>% | ||
rename (county_household_median_inc=S1901_C01_012E, | ||
county_household_mean_inc=S1901_C01_013E | ||
) %>% | ||
mutate(st_co=paste0(state,county)) | ||
# use county income to create an in imputed income based on county income | ||
income_fill<-zcta_state %>% | ||
left_join(county_income, by=join_by(GEOID_COUNTY_20==st_co), relationship="many-to-one") %>% | ||
mutate(wmedian=fraction*county_household_median_inc, | ||
wmean=fraction*county_household_mean_inc) %>% | ||
group_by(GEOID_ZCTA5_20, OID_ZCTA5_20) %>% | ||
summarise(imputed_household_median_income=sum(wmedian), | ||
imputed_household_mean_income=sum(wmean)) %>% | ||
ungroup() | ||
income<-income %>% | ||
left_join(income_fill, by=join_by(zip_code_tabulation_area==GEOID_ZCTA5_20), relationship="one-to-one")%>% | ||
mutate(household_median_inc=case_when(obscured_median==1 ~ imputed_household_median_income, .default=household_median_inc), | ||
household_mean_inc=case_when(obscured_mean<0 ~ imputed_household_mean_income, .default=household_mean_inc) | ||
) %>% | ||
select(-c(imputed_household_mean_income,imputed_household_median_income)) | ||
summary(income) | ||
saveRDS(income,file=here("data_folder", "main", paste0("income_",vintage_string,".Rds"))) | ||
saveRDS(zcta_state,file=here("data_folder", "main", paste0("zcta_state",vintage_string,".Rds"))) | ||
``` | ||
|
||
|
||
|
||
```{r get_MRIP, eval=TRUE} | ||
``` | ||
|