eclsk2011_study1.Rmd

---
title: "ECLSK:2011 Study1 Results"
date: "`r format(Sys.time(), '%d %B, %Y')`"
output:
  html_document:
    toc: true
    toc_float: true
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
set.seed(9001)
library(tidyverse)
library(psych)
library(lmerTest)
data(eclsk2011)
```

## Missingness

```{r}
meas_dist_plots <- function(df) {
  df %>%
    ggplot(aes(factor(value), fill=!is.na(value))) +
    geom_histogram(stat='count', show.legend = F) +
    facet_grid(vars(occasion), vars(measure)) 
}

missing_plot <- function(df) {
  df %>%
    group_by(occasion, measure) %>%
    summarize(missingness = sum(is.na(value)) / n()) %>%
    ggplot(aes(x=measure, y=missingness)) +
    geom_col() +
    geom_hline(yintercept=0.2) +
    facet_wrap(~occasion, ncol=1)
}
```

### CBQ Items

```{r, fig.width=10, fig.height=8}
cbq_meas <- c(
  'TBNOFIN', 'TBGCCLR', 'TBGCBLD', 'TBABSBK',
  'TBEZDAC', 'TBEZDSL', 'TBFLWIN',
  'TBWTTSK', 'TBPLNAC', 'TBTRBST', 'TBAPRRK', 'TBSTNO'
)

cbq_long <- eclsk2011$study1 %>%
            select(CHILDID, occasion, cbq_meas) %>%
            pivot_longer(cbq_meas, names_to='measure', values_to='value') %>%
            group_by(CHILDID, occasion) %>%
            filter(!all(is.na(value))) %>%
            ungroup()

meas_dist_plots(cbq_long)
missing_plot(cbq_long)
```

### ATL Items

```{r, fig.width=10, fig.height=8}
atl_meas <- c(
  'TKEEPS', 'TSHOWS', 'TWORKS', 'TADAPTS', 'TFOLLOW', 'TPERSIS', 'TATTEN'
)

atl_long <- eclsk2011$study1 %>%
            select(CHILDID, occasion, atl_meas) %>%
            pivot_longer(atl_meas, names_to='measure', values_to='value') %>%
            group_by(CHILDID, occasion) %>%
            filter(!all(is.na(value))) %>%
            ungroup() 

meas_dist_plots(atl_long)
missing_plot(atl_long)
```

## EFA

```{r}
filter_occasion <- function(data, o) {
  data %>%
  filter(occasion == o) %>%
  select(-occasion) %>%
  na.omit()
}

efa_measures <- c('TKEEPS', 'TSHOWS', 'TWORKS', 'TADAPTS', 'TFOLLOW', 'TPERSIS', 'TATTEN',
                  'TBEZDAC', 'TBNOFIN', 'TBGCCLR', 'TBGCBLD', 'TBEZDSL', 'TBABSBK',
                  'TBWTTSK', 'TBTRBST', 'TBFLWIN', 'TBSTNO') # Omit: 'TBPLNAC', 'TBAPRRK'

df_train <- eclsk2011$study1 %>%
            filter(split == 'train') %>%
            select(c('occasion', efa_measures))

occasions <- c(1,2,4)
```

### Parallel Analysis

```{r}
parallel_result <- occasions %>%
                   map(function(occasion) {
                     df_train %>%
                     filter_occasion(occasion) %>%
                     fa.parallel()
                   })
```

### Factor Structures

```{r}
efa_result <- expand_grid(nfactors = c(2,3,4,5),
                          occasion = occasions) %>%
             mutate(fa_result = pmap(., function(nfactors, occasion) {
               df_train %>%
               filter_occasion(occasion) %>%
               fa(nfactors, rotate='promax')
             }))

pwalk(efa_result, function(occasion, nfactors, fa_result) {
  fa.diagram(fa_result, main=paste0('Occasion: ', occasion, ', nF: ', nfactors), cut=0.2)
  print(fa_result)
})
```

## MMOD

```{r}
mmod_fits <- function(mmod_results) {
  tibble(
    name=map_chr(mmod_results, 'modelName'),
    n=map_dbl(mmod_results, 'numObs'),
    chisq=map_dbl(mmod_results, 'Chi'),
    dof=map_dbl(mmod_results, 'ChiDoF'),
    `-2ll`=map_dbl(mmod_results, 'Minus2LogLikelihood'),
    aic=map_dbl(mmod_results, 'AIC.Mx'),
    bic=map_dbl(mmod_results, 'BIC.Mx'),
    rmsea=map_dbl(mmod_results, 'RMSEA'),
    cfi=map_dbl(mmod_results, 'CFI'),
    tli=map_dbl(mmod_results, 'TLI')
  )
}

mmod_result <- Sys.glob('data/cache/eclsk2011_study1/*_result.rds') %>%
               map(read_rds) %>%
               setNames(map(., 'modelName'))

mmod_fits(mmod_result)
```

If you want to manually run / inspect the MMODs, use the following code:

```{r, eval=F}
mmod_models <- Sys.glob('data/cache/eclsk2011_study1/*_model.rds') %>%
               map(read_rds) %>%
               map(rlang::eval_tidy) %>%
               setNames(map(., 'name'))
```

## MLM

```{r}
df_val <- eclsk2011$study1 %>%
  filter(split == 'val') %>%
  filter_at(vars(TWORKS, TPERSIS, TSHOWS, TADAPTS, TKEEPS, TATTEN,
                 TBGCCLR, TBGCBLD, TBABSBK, TBEZDSL, TBTRBST, TBEZDAC, TBNOFIN,
                 TBSTNO, TBWTTSK, TFOLLOW, TBFLWIN), all_vars(!is.na(.))) %>% # Exclude NAs
  mutate(
    TFOLLOW_sc = TFOLLOW/4*7, # Scaled version of TFOLLOW to use with MINHIB
  ) %>%
  mutate( # Compute parcel scores for MMOD-chosen factor structure
    MATL = rowMeans(cbind(TWORKS, TPERSIS, TSHOWS, TADAPTS, TKEEPS, TATTEN), na.rm=T),
    MATTEN = rowMeans(cbind(8-TBEZDSL, 8-TBTRBST, 8-TBEZDAC, 8-TBNOFIN), na.rm=T),
    MINHIB = rowMeans(cbind(TBSTNO, TBWTTSK, TFOLLOW_sc, TBFLWIN), na.rm=T),
    MENG = rowMeans(cbind(TBGCCLR, TBGCBLD, TBABSBK), na.rm=T)
  ) %>%
  mutate( # Compute parcel scores for theoretical structure
    XTCHAPP_T = rowMeans(cbind(TWORKS, TPERSIS, TSHOWS, TADAPTS,
                                      TKEEPS, TATTEN, TFOLLOW), na.rm=T),
    XATTNFS_T = rowMeans(cbind(8-TBEZDAC, 8-TBNOFIN, 8-TBEZDSL,
                                      TBGCBLD, TBGCCLR, TBABSBK), na.rm=T),
    XINBCNT_T = rowMeans(cbind(8-TBTRBST, TBSTNO, TBWTTSK,
                                      TBFLWIN, TBPLNAC, TBAPRRK), na.rm=T)
  ) %>%
  group_by(CHILDID) %>%
  filter(n()==3) %>% # Data present at all occasions
  ungroup() %>%
  mutate_at( # Standardize all scales (mean=0, sd=1)
    vars(
      MATL, MENG, MATTEN, MINHIB,
      XTCHAPP_T, XATTNFS_T, XINBCNT_T,
      XTCHAPP, XATTNFS, XINBCNT
    ), scale
  )

# Verify that our manually created parcels scores for the theoretical structure match
# those created by the ECLS-K:2011
stopifnot(all(near(df_val$XTCHAPP_T, df_val$XTCHAPP, .01)))
stopifnot(all(near(df_val$XATTNFS_T, df_val$XATTNFS, .01)))
stopifnot(all(near(df_val$XINBCNT_T, df_val$XINBCNT, .01)))
```


### Reading

```{r}
read_theory <- lmer(XRTHETK5 ~ grade + XTCHAPP_T + XATTNFS_T + XINBCNT_T + (1|CHILDID),
                    df_val, REML=F, control=lmerControl(optimizer='bobyqa'))
summary(read_theory)

read_f4 <- lmer(XRTHETK5 ~ grade + MATL + MATTEN + MINHIB + MENG + (1|CHILDID),
                df_val, REML=F, control=lmerControl(optimizer='bobyqa'))
summary(read_f4)
```

### Mathematics

```{r}
math_theory <- lmer(XMTHETK5 ~ grade + XTCHAPP_T + XATTNFS_T + XINBCNT_T + (1|CHILDID),
                    df_val, REML=F, control=lmerControl(optimizer='bobyqa'))
summary(math_theory)

math_f4 <- lmer(XMTHETK5 ~ grade + MATL + MATTEN + MINHIB + MENG + (1|CHILDID),
                df_val, REML=F, control=lmerControl(optimizer='bobyqa'))
summary(math_f4)
```

### Trajectories

```{r}
subset <- sample(unique(df_val$CHILDID), 100)

df_val %>%
  pivot_longer(c('XRTHETK5', 'XMTHETK5'), names_to='subject', values_to='value') %>%
  filter(CHILDID %in% subset) %>%
  ggplot(aes(x=grade, y=value, group=CHILDID)) +
  geom_line(alpha=0.4) +
  facet_wrap(~subject)
```