-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathefi_aging.R
461 lines (442 loc) · 21.1 KB
/
efi_aging.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
#' ---
#' title: "Rockwood index outperforms patient age as a predictor of poor patient
#' outcomes."
#' author: 'Alex Bokov ^1,✉^, Sara Espinoza ^1^, Chandana Tripathy ^1^,
#' ..., ..., ..., and Kathleen R. Stevens ^1^'
#' css: "production.css"
#' abstract: 'We have found that the electronic frailty index (EFI), a risk
#' score developed using the Rockwood deficit-accumulation framework, is
#' a strong predictor of falls, hospitalizations, hospital-acquired
#' infections, and loss of independence without relying on any predictors
#' including patient age. In fact, EFI outperformed patient age as a
#' predictor. EFI''s accuracy was not negated by the fact that we used
#' patients of all ages, not just older patients.'
#' documentclass: article
#' clean: false
#' bibliography: efi_paper.bib
#' csl: harvard-cite-them-right.csl
#' output:
#' bookdown::html_document2:
#' fig_caption: true
#' self_contained: true
#' number_sections: false
#' keep_md: true
#' clean: false
#' bookdown::word_document2:
#' reference_docx: efi_template.docx
#' fig_caption: true
#' self_contained: true
#' number_sections: false
#' ---
#+ load_deps, echo=FALSE, message=FALSE, warning=FALSE,results='hide'
# setup ----
.projpackages <- c('GGally','pander','dplyr','ggplot2','data.table'
,'survival','broom','forcats','table1','english');
.deps <- c( '' );
.debug <- 0;
.junk<-capture.output(source('./scripts/global.R',chdir=TRUE,echo=FALSE
,local=TRUE));
# Set some formatting options for this document
panderOptions('table.alignment.default','right');
panderOptions('table.alignment.rownames','right');
panderOptions('table.split.table',Inf);
panderOptions('p.wrap','');
panderOptions('p.copula',', and ');
panderOptions('graph.fontfamily','serif');
# theme_get() gives default theme
theme_set(theme_bw(base_family = 'serif',base_size=14));
knitr::opts_chunk$set(echo=.debug>0, warning=.debug>0, message=.debug>0);
# detect current output format
.outfmt <- knitr::opts_knit$get('rmarkdown.pandoc.to');
if(is.null(.outfmt)){
if(knitr::is_html_output()) .outfmt <- 'html' else {
if(knitr::is_latex_output()) .outfmt <- 'latex' else {
.outfmt <- 'unknown';
}}};
.currentscript <- current_scriptname('efi_aging.R');
.figcount <- 0;
# Load files ----
# data
if(file.exists(inputdata['dat03'])){
dat03 <- fread(.dat03new<-inputdata['dat03']) } else {
.infilepatt <- paste0('^[0-9]{10}_[a-z0-9]{4,12}_'
,gsub('^[0-9]*_|\\.[^.]*$',''
,basename(inputdata['dat01'])),'_dev.tsv$');
if(length(.dat03candidates <- list.files(patt=.infilepatt))>0){
.dat03new <- file.info(.dat03candidates,extra_cols = F)[
,'mtime',drop=F] %>% arrange(desc(mtime)) %>% rownames %>% head(1);
dat03 <- fread(.dat03new);
message(inputdata['dat03'],' not found, loading most recent available: '
,.dat03new);
} else {
#.srcenv1 <- new.env();
unlink('data.R.rdata',force = T);
.loadedobjects <- load_deps2('data.R',cachedir = .workdir,debug=.debug);
#source('data.R',local=.srcenv1);
.dat03candidates <- list.files(patt=.infilepatt);
.dat03new <- file.info(.dat03candidates,extra_cols = F)[
,'mtime',drop=F] %>% arrange(desc(mtime)) %>% rownames %>% head(1);
dat03 <- fread(.dat03new);
message(inputdata['dat03'],' not found, nor any other valid input files.'
,'Ran data.R and loaded the resulting ',.dat03new,' file.')
}
}
# data dictionary
.srcenv0 <- new.env();
if(!file.exists('varmap.csv')) source('dictionary.R',local=.srcenv0);
# Then load `varmap.csv`
dct0 <- import('varmap.csv');
efi_pats <- unique(subset(dat03,a_efi>0)$patient_num);
dat03 <- subset(dat03,patient_num %in% efi_pats);
# syncronize dictionary with newly-loaded data
dct0<-sync_dictionary(dat03);
#+ fits
# fits ----
fits <- yfits <- ofits <- list();
kmplotlayers <- list(
scale_y_continuous(labels=scales::percent_format(1))
,geom_ribbon(aes(ymin=low,ymax=up,fill=group),alpha=0.3,show.legend = F)
,scale_fill_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE'))
,scale_color_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE'))
);
for(ii in v(c_agingpaper)){
# note: the cumsum(cumsum(%s))<=1 expression below is the part that cuts off
# each patient at their first post-index event for the respective events
.iidata <- gsub('%s',ii,"copy(dat03)[,c('keep','xx','Frail') :=
list(cumsum(cumsum(%s))<=1, %s, a_efi>0.19) ,by=patient_num][,xx:=%s][(keep)
,c('patient_num','a_t0','a_t1','xx','Frail','a_efi'
,'age_at_visit_days')]") %>%
parse(text=.) %>% eval;
fits[[ii]]$dispname <- dct0[dct0$colname==ii,'dispname'];
yfits[[ii]]$dispname <- paste0(fits[[ii]]$dispname,', Younger than 45');
ofits[[ii]]$dispname <- paste0(fits[[ii]]$dispname,', 45 and up');
.iifit <- survfit(Surv(a_t0,a_t1,xx)~Frail,data=.iidata);
fits[[ii]]$plot <- .iifit %>%
ggsurv(plot.cens=F, main=fits[[ii]]$dispname, order.legend=F
,ylab='% Patients event-free'
,xlab='Days since randomly selected index visit') + kmplotlayers +
coord_cartesian(ylim=c(.5,1),xlim=c(0,1096));
yfits[[ii]]$plot <- update(.iifit,subset=age_at_visit_days<45*365.25) %>%
ggsurv(plot.cens=F, main=yfits[[ii]]$dispname, order.legend=F
,ylab='% Patients event-free'
,xlab='Days since randomly selected index visit') + kmplotlayers +
coord_cartesian(ylim=c(.5,1),xlim=c(0,1096));
ofits[[ii]]$plot <- update(.iifit,subset=age_at_visit_days>=45*365.25) %>%
ggsurv(plot.cens=F, main=ofits[[ii]]$dispname, order.legend=F
,ylab='% Patients event-free'
,xlab='Days since randomly selected index visit') + kmplotlayers +
coord_cartesian(ylim=c(.5,1),xlim=c(0,1096));
#scale_x_continuous(limits=c(0,1096)) +
# scale_y_continuous(labels=scales::percent_format(1)) +
# geom_ribbon(aes(ymin=low,ymax=up,fill=group),alpha=0.3,show.legend = F) +
# scale_fill_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE')) +
# scale_color_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE')) +
# coord_cartesian(ylim=c(.5,1),xlim=c(0,1096));
fits[[ii]]$models$Frailty <- coxph(Surv(a_t0,a_t1,xx)~I(10*a_efi)
,model=TRUE,data=.iidata);
fits[[ii]]$models$`Patient Age` <- update(fits[[ii]]$models$Frailty
,. ~age_at_visit_days);
};
#+ los
# los ----
.losdat03 <- copy(dat03)[,Frail:=a_efi>0.19][,c('age_at_visit_days','a_efi'
,'Frail','a_los')];
fits$a_los$plot <- survfit(Surv(a_los)~Frail,.losdat03) %>%
ggsurv(plot.cens=F, main='Length of Stay'
,xlab='Days since inpatient admission'
,ylab='% Patients still in hospital'
,order.legend = F) +
scale_y_continuous(labels=scales::percent_format(1)) +
geom_ribbon(aes(ymin=low,ymax=up,fill=group),alpha=0.3,show.legend=F) +
scale_fill_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE')) +
scale_color_discrete(type=c('#00BFC4','#F8766D'),breaks=c('FALSE','TRUE'));
fits$a_los$models$Frailty <- coxph(Surv(a_los)~I(10*a_efi),.losdat03);
fits$a_los$models$`Patient Age` <- update(fits$a_los$models$Frailty
,.~age_at_visit_days);
#+ tb2_create
# tb2_create ----
tb2 <- sapply(fits,function(xx){
with(xx$models,cbind(tidy(Frailty,conf.int=T)
,exp=tidy(Frailty,expon=T,conf.int=T)[
,c('estimate','conf.low','conf.high')]))}
,simplify=F) %>% bind_rows(.id='outcomevar') %>%
mutate(betahat=sprintf('%.2f (%.2f, %.2f)',estimate,conf.low,conf.high)
,foldchange=sprintf('%.2f (%.2f, %.2f)',exp.estimate,exp.conf.low
,exp.conf.high),P=p.adjust(p.value)
,Outcome=submulti(outcomevar,dct0[,c('colname','dispname')])) %>%
rename(SE=std.error,Z=statistic);
# intro ----
#'
#' # Introduction
#'
#' Frailty is the lifelong erosion of stress resistance and accumulation of
#' impairments across multiple physiological systems. Among older community
#' dwelling adults 32% have been classified as pre-frail and 24% have been
#' classified as frail [@hoover13]. Frailty predicts disability,
#' injurious falls, and mortality [@pajewski19], emergency room visits
#' and hospitalizations [@fried01a], and long-term care admissions
#' [@pajewski19; @rockwood06; @rockwood05]. The Fried phenotype
#' [@fried01a] and Rockwood deficit accumulation index
#' [@rockwood07; @mitnitski01a], are the most commonly used methods for
#' operationalizing frailty. There is reasonable convergence between these
#' two approaches [@li15; @malmstrom14] but the deficit accumulation
#' approach does not require individual questionnaires nor physical
#' assessments yet offers similar [@malmstrom14] or possibly better
#' [@kulminski08] predictive accuracy.
#'
#' Possible points still left to cover:
#'
#' - outcomes important to clinical care of older patients
#'
#' - how care could be improved with better modeling of these outcomes
#'
#' - information about our site
#'
# methods ----
#' # Methods
#'
#' ## Population
#'
#' A random 1% sample (N=14,844) was drawn from the deidentified patient
#' records of a large academic health center and its teaching hospital
#' partner. Visits during which patient age was less than 18 years old were
#' excluded and then patients who had fewer than three visits in the
#' remaining data were excluded. To avoid bias, for each patient an index
#' visit date was chosen and only data recorded on or after that date was
#' used in analysis. To avoid distorted results in patients with sparse
#' visit histories, those who had fewer than two visit-dates after index
#' visit assignment were removed from the sample as were patients whose EFI
#' was never higher than 0. Finally, the patients were randomly assigned to
#' a development cohort (N=2,497 patients, 52,372 visit-days) or a testing
#' cohort (N=3,220 patients, 56,320 visit-days). Sensitivity analysis was
#' done to see the effect of leaving in data from all visits by adult
#' patients and the overall direction of EFI's effect was the same but
#' [the performance improvement relative to patient age was inflated].
#' All decisions about data processing and statistical analysis were made
#' using only the development cohort and blinded to the testing cohort. For
#' publication, the same analysis scripts were run on the testing cohort
#' and used to create all results reported here (the development version of
#' each table and figure is available in the supplemental materials). The
#' baseline characteristics of the testing cohort are shown in Table
#' \@ref(tab:table1).
#'
#' ***
# . table 1 ----
dat04 <- dat03[,lapply(.SD,head,1),by=patient_num,.SDcols=v(c_patdata)[1:5]] %>%
# the [,-1] in the following line and at the end are needed to avoid
# duplicates of patient_num
cbind(dat03[,lapply(.SD,any),by=patient_num,.SDcol=v(c_response)][,-1]
,dat03[,.(`Patient age (years)`=max(age_at_visit_days)/365.25
,Frailty=tail(a_efi,1)
,`Median Frailty`=median(a_efi,na.rm=T)
,`Number of Visits`=.N
,a_los=as.numeric(median(a_los,na.rm=T))
,`Frailty Stage`=cut(tail(a_efi,1),c(0,0.1,0.19,1)
,include.lowest = T
,labels=c('Nonfrail, < 0.1'
,'Prefrail, 0.1 - 0.19'
,'Frail, > 0.2')))
,by=patient_num][,-1]);
.tb1formula <- setdiff(names(dat04),c('language_cd','Frailty Stage'
,'Median Frailty','patient_num'
,'age_at_death_days'
,'age_at_visit_days'
,setdiff(v(c_response)
,v(c_agingpaper)))) %>%
paste0('`',.,'`',collapse='+') %>% paste('~',.,'|`Frailty Stage`') %>%
formula;
#+ table1,results='asis'
tb1 <- table1(.tb1formula,data=dat04) %>%
submulti(dct0[,c('colname','dispname')],'partial');
if(.outfmt == 'html'){
print(tb1);
cat("\n\nTable: (\\#tab:table1) Cohort demographics")
} else {
pander(data.frame(X='placeholder',row.names = 'Y')
,caption='(\\#tab:table1) Cohort demographics');
};
#'
#' ## Electronic Frailty Index
#'
#' @clegg16a developed an electronic frailty index (EFI) for UK health systems
#' following the methodology of Mitnitsky, Rockwood, et al.
#' [@mitnitski01a; @song10; @searle08a]. Recently, @pajewski19 adapted the EFI
#' to use ICD10 and ICD9. We further built on this work by also mapping
#' laboratory tests to specific LOINC codes. For each patient visit, all
#' distinct diagnoses and abnormal lab results over the preceding two-year
#' window for that patient were aggregated into a single EFI, a numeric value
#' that can range from 0 to 1 (but in practice seldom exceeded 0.6). As
#' mentioned above, we omitted all visits prior to a randomly select index
#' visit for each patient. This did not interfere with EFI calculation because
#' those EFI values were calculated separately for every distinct patient-date
#' in our health system, and then joined to the EHR data.
#'
#' ## Outcomes
#'
#' The primary outcomes we predicted with EFI were falls (any ICD9 codes in
#' E880-E888.9, E987.x, V00.x, or ICD10 codes in W00-W19), hospital
#' admissions (EHR encounter type = 'inpatient'), hospital-acquired
#' infections (having any ICD10 code associated with hospital-acquired
#' infections per Southern et al. @southern17 ), discharge to
#' intermediate care facilities (ICF) or skilled nursing facilities (SNF)
#' (using discharge disposition codes, for patients whose admit-source for
#' that encounter was 'Home'), and all-cause mortality insofar as it can be
#' ascertained from the EHR discharge disposition or vital status (which,
#' in the absence of linkage to external death indexes are likely to
#' under-report the actual mortality and so should be interpreted with
#' caution).
#'
# . stats ----
#' ## Statistical Analysis
#'
#' For each outcome of interest, we used a Cox proportional hazard model to
#' estimate the risk of the first occurrences of the outcome after the
#' patients' respective index visits using EFI as the predictor. Unlike
#' earlier studies, we treated EFI as a time-varying numeric predictor with
#' multiple followups per patient. Since only the first occurrence was
#' being predicted, a recurring event model was not necessary. As a
#' comparison, for each outcome a second analysis was done that was
#' identical to the first except that patient age at visit (in days) was
#' used as the predictor.
#'
# results ----
.resultsfold <- subset(tb2,outcomevar %in% v(c_truefalse)) %>%
with(paste0(round(exp(estimate),1),'-fold for ',Outcome)) %>% tolower %>%
submulti(cbind(c('icf','snf')
,c('ICF','SNF after having been admitted from home')));
#' # Results
#'
#' Table \@ref(tab:tb2) shows the results of Cox proportional hazard models for
#' each of the responses, with EFI as the predictor. For each 0.1 increase in
#' EFI, we found at least a doubling of risk: `r pander(.resultsfold)`.
#' Furthermore, the probability of delayed discharge increased and thus longer
#' stays were observed in patients with an EFI > 0.19.
#' The p-values shown have been adjusted for multiple comparisons
#' (`r as.english(nrow(tb2)) %>% as.character` outcomes reported in
#' one study) using the @holm79 method and in all cases are highly
#' significant. It is well established in gerontology that an enormous and
#' diverse range of poor health outcomes are correlated with an individual's
#' age, but chronological age provides no information about the progression of
#' biological age in an individual. It has long been a priority in aging
#' research to find a metric for aging that is subject to individual variation
#' and intervention. Rockford et al. have pointed out {ref} that deficit
#' accumulation has the hallmarks of such a measure.
#'
# . coxph fits ----
#'
#' *****
#'
#'
# . table 2 ----
#+ tb2
tb2[,c('Outcome','betahat','foldchange','SE','Z','P')] %>%
rename(`β^ (95% CI)`=betahat,`fold-change (95% CI)`=foldchange ) %>%
pander(digits=3, caption='(\\#tab:tb2) Cox-proportional hazards with EFI as a predictor');
#'
#' Therefore, we compared the predictive accuracy of the EFI to that of
#' patient age. For each outcome, we fit an additional Cox proportional
#' hazard model, this time using age at visit as the predictor variable. It
#' appears, at least in this population, that not only can EFI accurately
#' predict mortality and poor patient outcomes without any knowledge of
#' patient age but it does so better than patient age.
#'
# . table 3, age vs efi ----
#+ tb3
# [v(c_agingpaper)]
bind_rows(sapply(fits,function(xx){
do.call(bind_rows,c(sapply(xx$models,function(yy){
glance(yy)[,c('concordance','logLik','AIC')]
},simplify=F),.id='Predictor'))
},simplify=F),.id='Outcome') %>%
mutate(Outcome=ifelse(Predictor=='Frailty'
,submulti(Outcome,dct0[,c('colname','dispname')])
,' ')) %>%
rename(Concordance=concordance,`Log Likelihood`=logLik) %>%
pander(caption='(\\#tab:tb3) Comparing EFI and age as predictors');
#'
#' In Table \@ref(tab:tb3) we compare the performance of EFI and age as
#' predictors for each of the `r as.english(nrow(tb2)) %>% as.character`
#' outcomes. In all cases, the EFI models has a robustly lower AIC and
#' log-likelihood than the respective patient-age models, as well as a higher
#' concordance (in all cases greater than 0.7). Cox models incorporating EFI
#' together with age, with or without an interaction term, did not have a
#' significantly better fit than the univariate EFI model (not shown, available
#' in supplement).
#'
#' In figures \@ref(fig:kmplots) a-f we show Kaplan-Meier plots for each of the
#' outcomes stratified by whether EFI is greater than 0.19 (Frail=TRUE) or less
#' (Frail=FALSE) [@stow18].
#'
#+ kmplots, fig.show="hold", out.width="50%", fig.cap=" - "
# . kmplots ----
.subfig <- 0;
for(jj in fits) {
.subfig <- .subfig + 1;
print(jj$plot + labs(caption=paste0('Figure 1 ',letters[.subfig],'. \n')) +
theme(plot.caption=element_text(size=15)) );
}
#'
# discussion ----
#' # Discussion
#'
#' Our data shares the fundamental limitation of the EHR system from which
#' it was obtained: like all EHR systems, it only has information that
#' providers and coders put into it. Events taking place outside the health
#' system or at un-connected health systems are not visible to our analysis.
#' On the other hand, providers who rely on EHR systems at point of care
#' are also working under these limitations on. The data we used is
#' representative of this scenario, and despite the limitations EFI
#' provides accurate predictions of poor patient outcomes. Because our
#' implementation of the Rockwood index real EHR data, it is more directly
#' transferable to clinical use than implementations based on curated
#' registries. This suggests that EFI is most accurate for patients who
#' have accumulated a reasonable in-system visit-history. Further work is
#' needed to find a more precise relationship between the length of a
#' patient's visit-history and the accuracy of EFI and better distinguish
#' genuinely non-frail patients from those who get most of their care
#' outside the researchers' health system.
#'
#' Vital status is often out of date in EHR systems if the patient did not
#' die in the hospital or shortly after their visit, so our data may
#' under-represent the true mortality rates. Nevertheless, the relationship
#' between EFI and mortality risk observed here agrees with previous EFI
#' studies. Unlike previous studies we sampled all adults rather than just
#' older adults so we could assess the performance of EFI across the
#' lifespan and include individuals who might have early-onset frailty.
#'
#' There is no gold standard method to assess Frailty in clinical practice.
#' Currently available frailty assessment tools used in geriatric practice have
#' good validity (for example @fried01a) but these are time
#' intensive and often difficult to implement in a busy general practice.
#' Assessing frailty helps clinicians identify high risk patients and tailor
#' interventions to prevent health decline and poor outcomes. Because of the
#' simplicity of the Rockwood Frailty Index, it is more likely to be adopted by
#' clinicians in a busy practice.
#'
#' Other possible points to cover:
#'
#' - What we learned by not restricting the sample to older adults (or
#' move to methods/theory paper?)
#'
#' - What agrees and what disagrees with previous work?
#' - What is new?
#' - Per-visit EFI allows plotting an EFI trajectory over time
#' - Strictly limiting data elements to those we can count on being
#' available in any EHR system, at any site
#' - future work
#' - theoretical and practical implications
#'
#'
# conclusions ----
#' # Conclusions
#'
#' TBD
#'
#' # Acknowledgments
#'
#' This work was supported by NIH/NCATS UL1TR001120 (AFB, KRS), the Long School
#' of Medicine KL2 Award (AFB), ...
#'
# refs ----
#' # References
#'