-
Notifications
You must be signed in to change notification settings - Fork 0
Data Output 2016
Adam VanIwaarden edited this page Aug 24, 2016
·
1 revision
The final step in the PARCC analyses is the custom formatting of the results to satisfy Pearson's requirements. The SGP results and base data were exported via the outputSGP function in data analysis step. The Spring 2016 results from the PARCC consortium and individual states are read into R and combined, and then additional variables describing the reason for missing SGPs is added. Finally the formatted results are exported into state specific, compressed (.zip), comma seperated (.csv) files.
########################################################################
### ###
### Format PARCC 2016 Results Data to Return to Pearson ###
### ###
########################################################################
### Load required packages
require(SGP)
require(data.table)
setwd("PARCC")
###
### Read in Fall and Spring 2016 Output Files
###
load("./PARCC/Data/PARCC_SGP_LONG_Data.Rdata")
load("./Colorado/Data/Colorado_SGP_LONG_Data.Rdata")
load("./Illinois/Data/Illinois_SGP_LONG_Data.Rdata")
load("./Maryland/Data/Maryland_SGP_LONG_Data.Rdata")
load("./Massachusetts/Data/Massachusetts_SGP_LONG_Data.Rdata")
load("./New_Jersey/Data/New_Jersey_SGP_LONG_Data.Rdata")
load("./New_Mexico/Data/New_Mexico_SGP_LONG_Data.Rdata")
load("./Rhode_Island/Data/Rhode_Island_SGP_LONG_Data.Rdata")
load("./Washington_DC/Data/WASHINGTON_DC_SGP_LONG_Data.Rdata")
#### Set names based on Pearson file layout
parcc.var.names <-
c("AssessmentYear", "StateAbbreviation", "PARCCStudentIdentifier",
"GradeLevelWhenAssessed", "Period", "TestCode", "SummativeScoreRecordUUID",
"StudentTestUUID", "SummativeScaleScore", "IRTTheta", "SummativeCSEM",
"Filler", "TestFormat")
center.var.names <-
c("StudentGrowthPercentileComparedtoState",
"StudentGrowthPercentileComparedtoPARCC","SGPPreviousTestCodeState",
"SGPPreviousTestCodePARCC", "SGPUpperBoundState", "SGPLowerBoundState",
"SGPUpperBoundPARCC", "SGPLowerBoundPARCC")
all.var.names <- c(head(parcc.var.names,-1), center.var.names, "TestFormat")
#### State Data
State_LONG_Data <- rbindlist(list(
Colorado_SGP_LONG_Data, Illinois_SGP_LONG_Data,
Maryland_SGP_LONG_Data, Massachusetts_SGP_LONG_Data,
New_Jersey_SGP_LONG_Data, New_Mexico_SGP_LONG_Data,
Rhode_Island_SGP_LONG_Data, WASHINGTON_DC_SGP_LONG_Data), fill=TRUE)
#####
### Data for Consolodated SGP object
#####
load("./PARCC/Data/PARCC_SGP.Rdata")
State_Subset <- State_LONG_Data[,
list(VALID_CASE, CONTENT_AREA, YEAR, ID, SGP_SIMEX, SGP, SGP_0.05_CONFIDENCE_BOUND,
SGP_0.95_CONFIDENCE_BOUND, SGP_NORM_GROUP)][!is.na(SGP) & YEAR=='2015_2016.2']
state.vars <- c("SGP_SIMEX", "SGP", "SGP_0.05_CONFIDENCE_BOUND", "SGP_0.95_CONFIDENCE_BOUND", "SGP_NORM_GROUP")
setnames(State_Subset, state.vars, paste(state.vars, "_STATE", sep=""))
setkey(State_Subset, VALID_CASE, CONTENT_AREA, YEAR, ID)
setkey(PARCC_SGP_LONG_Data, VALID_CASE, CONTENT_AREA, YEAR, ID)
PARCC_SGP@Data <- merge(PARCC_SGP_LONG_Data, State_Subset, all.x=TRUE)
PARCC_SGP <- prepareSGP(PARCC_SGP)
dir.create("./PARCC/Data/Pearson", recursive=TRUE)
save(PARCC_SGP, file="./PARCC/Data/Pearson/PARCC_SGP-Consortium.Rdata")
#####
### Data for formatted output to Pearson
#####
## Add in the Missing SGP identifiers requested by Pearson
### PARCC Consortium
#### ELA
PARCC_SGP_LONG_Data[, MISSING_SGP := "NA"]
na.ela.ids <- PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.ela.ids <- PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR!='2015_2016.2']$ID
#### Identify the skipped grades and repeaters, and Fall to Spring (Actual Small Cohort)
yes.prior.ela.ids <- intersect(na.ela.ids, prior.ela.ids)
ela <- PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & ID %in% yes.prior.ela.ids][, list(VALID_CASE, ID, StudentTestUUID, YEAR, GRADE)]
ela_wide <- dcast(ela, ID ~ YEAR, value.var="GRADE")
sm.cohort.ela.ids <- ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) == 1,]$ID
repeat.ela.ids <- ela_wide[`2014_2015.2`==`2015_2016.2` | `2015_2016.1`==`2015_2016.2`,]$ID
skip.ela.ids <- unique(c(ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) > 1,]$ID, ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) > 1,]$ID))
regr.ela.ids <- unique(c(ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) < 0,]$ID, ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) < 0,]$ID))
length(unique(c(sm.cohort.ela.ids, repeat.ela.ids, skip.ela.ids, regr.ela.ids))) == length(yes.prior.ela.ids)
table(ela_wide[ID %in% skip.ela.ids][, `2014_2015.2`, `2015_2016.2`]) # Only 9th to 11th grade ELA is close (still < 1000 @ 920)
PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% regr.ela.ids, MISSING_SGP := "Regressed"]
PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% skip.ela.ids, MISSING_SGP := "Skipped"]
PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% repeat.ela.ids, MISSING_SGP := "Repeat"]
PARCC_SGP_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% sm.cohort.ela.ids, MISSING_SGP := "<1000"]
table(PARCC_SGP_LONG_Data[, MISSING_SGP])
### Grade Level Math
na.math.ids <- PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.math.ids <- PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR!='2015_2016.2']$ID
#### Identify the skipped grades and repeaters, and Fall to Spring (NONE for Grade Level Math)
yes.prior.math.ids <- intersect(na.math.ids, prior.math.ids)
math <- PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & ID %in% yes.prior.math.ids][, list(VALID_CASE, ID, StudentTestUUID, YEAR, GRADE)]
math_wide <- dcast(math, ID ~ YEAR, value.var="GRADE")
# sm.cohort.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) == 1,]$ID
repeat.math.ids <- math_wide[`2014_2015.2`==`2015_2016.2`,]$ID
skip.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) > 1,]$ID
regr.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) < 0,]$ID
length(unique(c(repeat.math.ids, skip.math.ids, regr.math.ids))) == length(yes.prior.math.ids)
table(math_wide[ID %in% skip.math.ids][, `2014_2015.2`, `2015_2016.2`]) # Skipping from 6th to 8th grade Math would have been feasible
PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% regr.math.ids, MISSING_SGP := "Regressed"]
PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% skip.math.ids, MISSING_SGP := "Skipped"]
PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% repeat.math.ids, MISSING_SGP := "Repeat"]
# PARCC_SGP_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% sm.cohort.math.ids, MISSING_SGP := "<1000"]
table(PARCC_SGP_LONG_Data[, MISSING_SGP])
### EOCT Math
na.eoct.ids <- PARCC_SGP_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS", "MATHEMATICS", "MATHEMATICS_SS") & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.eoct.ids <- PARCC_SGP_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS") & YEAR!='2015_2016.2']$ID
#### Identify EOCT students with no SGP, but some Math prior
yes.prior.eoct.ids <- setdiff(intersect(na.eoct.ids, prior.eoct.ids), yes.prior.math.ids) # weed out grade level math only cases
#### Identify Fall to Spring (NONE for Grade Level Math)
eoct <- PARCC_SGP_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS") & ID %in% yes.prior.eoct.ids][, list(VALID_CASE, ID, StudentTestUUID, CONTENT_AREA, YEAR)][grep("_SS", CONTENT_AREA, invert =TRUE),]
eoct_wide <- data.table(eoct[YEAR=='2015_2016.2'][, list(ID, StudentTestUUID, CONTENT_AREA)], key="ID")[data.table(eoct[YEAR!='2015_2016.2'][, list(ID, CONTENT_AREA)], key="ID")]
repeater.ids <- eoct_wide[CONTENT_AREA==i.CONTENT_AREA & i.CONTENT_AREA != "MATHEMATICS"]$ID # still 3 grade level Maths included.
repeater.test.ids <- eoct_wide[CONTENT_AREA==i.CONTENT_AREA & i.CONTENT_AREA != "MATHEMATICS"]$StudentTestUUID # still 3 grade level Maths included.
PARCC_SGP_LONG_Data[StudentTestUUID %in% repeater.test.ids, MISSING_SGP := "Repeat"]
sm.cohort.eoct.ids <- setdiff(yes.prior.eoct.ids, repeater.ids)
PARCC_SGP_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS", "MATHEMATICS", "MATHEMATICS_SS") & YEAR=='2015_2016.2' & ID %in% sm.cohort.eoct.ids, MISSING_SGP := "<1000"]
### Checks
nas <- PARCC_SGP_LONG_Data[MISSING_SGP=="NA" & is.na(SGP)]
na.ela.ids <- nas[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.ela.ids <- nas[CONTENT_AREA == "ELA" & YEAR!='2015_2016.2']$ID
no.prior.ela.ids <- setdiff(na.ela.ids, prior.ela.ids)
identical(na.ela.ids, no.prior.ela.ids)
na.math.ids <- nas[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.math.ids <- nas[CONTENT_AREA == "MATHEMATICS" & YEAR!='2015_2016.2']$ID
no.prior.math.ids <- setdiff(na.math.ids, prior.math.ids)
identical(na.math.ids, no.prior.math.ids)
na.eoct.ids <- nas[grep("_SS", CONTENT_AREA, invert =TRUE),][!CONTENT_AREA %in% c("ELA", "MATHEMATICS") & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.eoct.ids <- nas[grep("_SS", CONTENT_AREA, invert =TRUE),][!CONTENT_AREA %in% c("ELA", "ELA_SS") & YEAR!='2015_2016.2'][grep("_SS", CONTENT_AREA, invert =TRUE),]$ID
no.prior.eoct.ids <- setdiff(na.eoct.ids, prior.eoct.ids)
identical(na.eoct.ids, no.prior.eoct.ids)
xids <- setdiff(na.eoct.ids, no.prior.eoct.ids)
data.table(eoct[ID %in% xids], key="ID") # looks like kids with 2 records in 2015_2016.2 - one without an SGP and the other with (8 kids)
### PARCC States
#### ELA
State_LONG_Data[, MISSING_SGP := "NA"]
na.ela.ids <- State_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.ela.ids <- State_LONG_Data[CONTENT_AREA == "ELA" & YEAR!='2015_2016.2']$ID
#### Identify the skipped grades and repeaters, and Fall to Spring (Actual Small Cohort)
yes.prior.ela.ids <- intersect(na.ela.ids, prior.ela.ids)
ela <- State_LONG_Data[CONTENT_AREA == "ELA" & ID %in% yes.prior.ela.ids][, list(VALID_CASE, ID, StudentTestUUID, YEAR, GRADE)]
ela_wide <- dcast(ela, ID ~ YEAR, value.var="GRADE")
sm.cohort.ela.ids <- unique(c(ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) == 1,]$ID, ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) == 1,]$ID))
repeat.ela.ids <- ela_wide[`2014_2015.2`==`2015_2016.2` | `2015_2016.1`==`2015_2016.2`,]$ID
skip.ela.ids <- unique(c(ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) > 1,]$ID, ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) > 1,]$ID))
regr.ela.ids <- unique(c(ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2015_2016.1`) < 0,]$ID, ela_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) < 0,]$ID))
length(unique(c(sm.cohort.ela.ids, repeat.ela.ids, skip.ela.ids, regr.ela.ids))) == length(yes.prior.ela.ids)
table(ela_wide[ID %in% skip.ela.ids][, `2014_2015.2`, `2015_2016.2`]) # Only 9th to 11th grade ELA is close (still < 1000 @ 920)
State_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% regr.ela.ids, MISSING_SGP := "Regressed"]
State_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% skip.ela.ids, MISSING_SGP := "Skipped"]
State_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% repeat.ela.ids, MISSING_SGP := "Repeat"]
State_LONG_Data[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & ID %in% sm.cohort.ela.ids, MISSING_SGP := "<1000"]
table(State_LONG_Data[, MISSING_SGP], exclude=NULL)
### Grade Level Math
na.math.ids <- State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.math.ids <- State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR!='2015_2016.2']$ID
#### Identify the skipped grades and repeaters, and Fall to Spring (NONE for Grade Level Math)
yes.prior.math.ids <- intersect(na.math.ids, prior.math.ids)
math <- State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & ID %in% yes.prior.math.ids][, list(VALID_CASE, ID, StudentTestUUID, YEAR, GRADE)]
math_wide <- dcast(math, ID ~ YEAR, value.var="GRADE")
# sm.cohort.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) == 1,]$ID
repeat.math.ids <- math_wide[`2014_2015.2`==`2015_2016.2`,]$ID
skip.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) > 1,]$ID
regr.math.ids <- math_wide[as.numeric(`2015_2016.2`)-as.numeric(`2014_2015.2`) < 0,]$ID
length(unique(c(repeat.math.ids, skip.math.ids, regr.math.ids))) == length(yes.prior.math.ids)
table(math_wide[ID %in% skip.math.ids][, `2014_2015.2`, `2015_2016.2`]) # Skipping from 6th to 8th grade Math would have been feasible
State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% regr.math.ids, MISSING_SGP := "Regressed"]
State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% skip.math.ids, MISSING_SGP := "Skipped"]
State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% repeat.math.ids, MISSING_SGP := "Repeat"]
# State_LONG_Data[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & ID %in% sm.cohort.math.ids, MISSING_SGP := "<1000"]
table(State_LONG_Data[, MISSING_SGP], exclude=NULL)
### EOCT Math
na.eoct.ids <- State_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS", "MATHEMATICS", "MATHEMATICS_SS") & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.eoct.ids <- State_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS") & YEAR!='2015_2016.2']$ID
#### Identify EOCT students with no SGP, but some Math prior
yes.prior.eoct.ids <- setdiff(intersect(na.eoct.ids, prior.eoct.ids), yes.prior.math.ids) # weed out grade level math only cases
#### Identify Fall to Spring (NONE for Grade Level Math)
eoct <- State_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS") & ID %in% yes.prior.eoct.ids][, list(VALID_CASE, ID, StudentTestUUID, CONTENT_AREA, YEAR)][grep("_SS", CONTENT_AREA, invert =TRUE),]
eoct_wide <- data.table(eoct[YEAR=='2015_2016.2'][, list(ID, StudentTestUUID, CONTENT_AREA)], key="ID")[data.table(eoct[YEAR!='2015_2016.2'][, list(ID, CONTENT_AREA)], key="ID")]
repeater.ids <- eoct_wide[CONTENT_AREA==i.CONTENT_AREA & i.CONTENT_AREA != "MATHEMATICS"]$ID # still 3 grade level Maths included.
repeater.test.ids <- eoct_wide[CONTENT_AREA==i.CONTENT_AREA & i.CONTENT_AREA != "MATHEMATICS"]$StudentTestUUID # still 3 grade level Maths included.
State_LONG_Data[StudentTestUUID %in% repeater.test.ids, MISSING_SGP := "Repeat"]
sm.cohort.eoct.ids <- setdiff(yes.prior.eoct.ids, repeater.ids)
State_LONG_Data[!CONTENT_AREA %in% c("ELA", "ELA_SS", "MATHEMATICS", "MATHEMATICS_SS") & YEAR=='2015_2016.2' & ID %in% sm.cohort.eoct.ids, MISSING_SGP := "<1000"]
### Checks
nas <- State_LONG_Data[MISSING_SGP=="NA" & is.na(SGP)]
na.ela.ids <- nas[CONTENT_AREA == "ELA" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.ela.ids <- nas[CONTENT_AREA == "ELA" & YEAR!='2015_2016.2']$ID
no.prior.ela.ids <- setdiff(na.ela.ids, prior.ela.ids)
identical(na.ela.ids, no.prior.ela.ids)
# xids <- setdiff(na.ela.ids, no.prior.ela.ids)
# m.ela <- data.table(State_LONG_Data[CONTENT_AREA == "ELA" & ID %in% xids], key="ID") # looks like kids with 2 records in 2015_2016.2 - one without an SGP and the other with (8 kids)
na.math.ids <- nas[CONTENT_AREA == "MATHEMATICS" & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.math.ids <- nas[CONTENT_AREA == "MATHEMATICS" & YEAR!='2015_2016.2']$ID
no.prior.math.ids <- setdiff(na.math.ids, prior.math.ids)
identical(na.math.ids, no.prior.math.ids)
na.eoct.ids <- nas[grep("_SS", CONTENT_AREA, invert =TRUE),][!CONTENT_AREA %in% c("ELA", "MATHEMATICS") & YEAR=='2015_2016.2' & is.na(SGP)]$ID
prior.eoct.ids <- nas[grep("_SS", CONTENT_AREA, invert =TRUE),][!CONTENT_AREA %in% c("ELA", "ELA_SS") & YEAR!='2015_2016.2'][grep("_SS", CONTENT_AREA, invert =TRUE),]$ID
no.prior.eoct.ids <- setdiff(na.eoct.ids, prior.eoct.ids)
identical(na.eoct.ids, no.prior.eoct.ids)
xids <- setdiff(na.eoct.ids, no.prior.eoct.ids)
data.table(eoct[ID %in% xids], key="ID") # looks like kids with 2 records in 2015_2016.2 - one without an SGP and the other with (8 kids)
### Substitute in MISSING_SGP coding where SGP is missing
State_LONG_Data <- State_LONG_Data[YEAR=='2015_2016.2'][grep("_SS", CONTENT_AREA, invert =TRUE),]
PARCC_LONG_Data <- PARCC_SGP_LONG_Data[YEAR=='2015_2016.2'][grep("_SS", CONTENT_AREA, invert =TRUE),]
State_LONG_Data[, SGP := as.character(SGP)]
State_LONG_Data[which(is.na(SGP)), SGP := MISSING_SGP]
### Remove rows associated with the Scale Score SGP
State_LONG_Data[, ID:=gsub("_DUPS_[0-9]*", "", ID)]
setnames(State_LONG_Data,
c("ID", "SCALE_SCORE_ACTUAL", "SCALE_SCORE_CSEM_ACTUAL", "SCALE_SCORE", "SCALE_SCORE_CSEM",
"SGP", "SGP_0.05_CONFIDENCE_BOUND", "SGP_0.95_CONFIDENCE_BOUND"),
c("PARCCStudentIdentifier", "SummativeScaleScore", "SummativeCSEM", "IRTTheta", "Filler",
"StudentGrowthPercentileComparedtoState", "SGPLowerBoundState", "SGPUpperBoundState"))
### Split SGP_NORM_GROUP to create 'SGPPreviousTestCode*' Variables
state.tmp.split <- strsplit(as.character(State_LONG_Data$SGP_NORM_GROUP), "; ")
State_LONG_Data[, CONTENT_AREA_PRIOR := sapply(sapply(strsplit(sapply(strsplit(sapply(state.tmp.split, function(x) rev(x)[2]), "/"), '[', 2), "_"), head, -1), paste, collapse="_")]
State_LONG_Data[, GRADE_PRIOR := sapply(strsplit(sapply(strsplit(sapply(state.tmp.split, function(x) rev(x)[2]), "/"), '[', 2), "_"), tail, 1)]
State_LONG_Data[which(GRADE_PRIOR=="EOCT"), GRADE_PRIOR := ""]
State_LONG_Data[, SGPPreviousTestCodeState := factor(paste(CONTENT_AREA_PRIOR, GRADE_PRIOR))]
levels(State_LONG_Data$SGPPreviousTestCodeState) <- c("ALG01", "ALG02", "ELA10", "ELA03", "ELA04", "ELA05", "ELA06", "ELA07", "ELA08", "ELA09",
"GEO01", "MAT1I", "MAT03", "MAT04", "MAT05", "MAT06", "MAT07", "MAT08", NA)
State_LONG_Data[, SGPPreviousTestCodeState := as.character(SGPPreviousTestCodeState)]
State_LONG_Data[, CONTENT_AREA_PRIOR := NULL]
State_LONG_Data[, GRADE_PRIOR := NULL]
State_LONG_Data <- State_LONG_Data[, names(State_LONG_Data)[names(State_LONG_Data) %in% all.var.names], with=FALSE]
### PARCC Consortium Data
PARCC_LONG_Data[, SGP := as.character(SGP)]
PARCC_LONG_Data[which(is.na(SGP)), SGP := MISSING_SGP]
PARCC_LONG_Data[, ID:=gsub("_DUPS_[0-9]*", "", ID)]
setnames(PARCC_LONG_Data,
c("ID", "SCALE_SCORE_ACTUAL", "SCALE_SCORE", "SCALE_SCORE_CSEM_ACTUAL", "SCALE_SCORE_CSEM",
"SGP", "SGP_0.05_CONFIDENCE_BOUND", "SGP_0.95_CONFIDENCE_BOUND"),
c("PARCCStudentIdentifier", "SummativeScaleScore", "IRTTheta", "SummativeCSEM", "Filler",
"StudentGrowthPercentileComparedtoPARCC", "SGPLowerBoundPARCC", "SGPUpperBoundPARCC"))
### Split SGP_NORM_GROUP to create 'SGPPreviousTestCode*' Variables
parcc.tmp.split <- strsplit(as.character(PARCC_LONG_Data$SGP_NORM_GROUP), "; ")
PARCC_LONG_Data[, CONTENT_AREA_PRIOR := sapply(sapply(strsplit(sapply(strsplit(sapply(parcc.tmp.split, function(x) rev(x)[2]), "/"), '[', 2), "_"), head, -1), paste, collapse="_")]
PARCC_LONG_Data[, GRADE_PRIOR := sapply(strsplit(sapply(strsplit(sapply(parcc.tmp.split, function(x) rev(x)[2]), "/"), '[', 2), "_"), tail, 1)]
PARCC_LONG_Data[which(GRADE_PRIOR=="EOCT"), GRADE_PRIOR := ""]
PARCC_LONG_Data[, SGPPreviousTestCodePARCC := factor(paste(CONTENT_AREA_PRIOR, GRADE_PRIOR))]
levels(PARCC_LONG_Data$SGPPreviousTestCodePARCC) <- c("ALG01", "ALG02", "ELA10", "ELA03", "ELA04", "ELA05", "ELA06", "ELA07", "ELA08", "ELA09",
"GEO01", "MAT1I", "MAT03", "MAT04", "MAT05", "MAT06", "MAT07", "MAT08", NA)
PARCC_LONG_Data[, SGPPreviousTestCodePARCC := as.character(SGPPreviousTestCodePARCC)]
PARCC_LONG_Data[, CONTENT_AREA_PRIOR := NULL]
PARCC_LONG_Data[, GRADE_PRIOR := NULL]
PARCC_LONG_Data <- PARCC_LONG_Data[, names(PARCC_LONG_Data)[names(PARCC_LONG_Data) %in% all.var.names], with=FALSE]
### Merge PARCC and State Data
FINAL_LONG_Data <- merge(PARCC_LONG_Data, State_LONG_Data, by=intersect(names(PARCC_LONG_Data), names(State_LONG_Data)), all.x=TRUE)
FINAL_LONG_Data[is.na(StudentGrowthPercentileComparedtoState), StudentGrowthPercentileComparedtoState := "NA"]
setcolorder(FINAL_LONG_Data, all.var.names)
### Save R object and Export/zip State specific .csv files
dir.create("./PARCC/Data/Pearson")
save(FINAL_LONG_Data, file="./PARCC/Data/Pearson/PARCC_SGP_LONG_Data_2015_2016.2-FORMATTED.Rdata")
FINAL_LONG_Data[which(StudentGrowthPercentileComparedtoPARCC %in% c("NA", "Regressed", "Repeat", "Skipped")), StudentGrowthPercentileComparedtoPARCC := as.character(NA)]
FINAL_LONG_Data[which(StudentGrowthPercentileComparedtoState %in% c("NA", "Regressed", "Repeat", "Skipped")), StudentGrowthPercentileComparedtoState := as.character(NA)]
for (abv in tail(unique(FINAL_LONG_Data$StateAbbreviation), -1)) {
dir.create(dir.name <- paste0("./", gsub(" ", "_", capwords(SGP:::getStateAbbreviation(abv, type="state"), special.words="DC")), "/Data/Pearson"), recursive=TRUE)
fname <- paste0(dir.name, "/PARCC_", abv, "_2015-2016_SGP-Results_", format(Sys.Date(), format="%Y%m%d"), ".csv")
fwrite(FINAL_LONG_Data[StateAbbreviation == abv & AssessmentYear == "2015-2016" & Period == "Spring"], fname) #, col.names = FALSE
zip(zipfile=paste(fname, "zip", sep="."), files=fname, flags="-mq")
}
SGP - Student Growth Percentiles SGP Blog | SGP GitHub Repo | SGP on CRAN