-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathABCD_ParRiskDataComp.R
162 lines (131 loc) · 7.91 KB
/
ABCD_ParRiskDataComp.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
require(tidyverse)
require(data.table)
require(stringi)
require(dplyr)
#set the working directory from which the files will be read from
setwd("C:/Users/mmatt/Desktop/Projects/psychopathology-risk/github/data/ABCDStudyNDA")
#create a list of the files from your target directory
mat <- matrix(NA,1,1)
file_list <- list.files(path="C:/Users/mmatt/Desktop/Projects/psychopathology-risk/github/data/ABCDStudyNDA",pattern=".txt")
struct_list <- rep(list(mat),length(file_list))
#initialize empty list
data_list <- vector(mode = "list", length = length(file_list))
#Add element names of each structure to dataframe in list
for (i in 1:length(file_list)) {
struct_list[[i]] <- colnames(fread(file_list[i],header=TRUE,nrow=0))
}
###IDENTIFY ELEMENTS OF INTEREST###
#Common to all studies - DO NOT CHANGE THIS ONE OR REPEAT THESE ELSEWHERE
common = c('subjectkey', 'interview_age', 'interview_date', 'eventname', 'sex',
'rel_family_id')
#demographic
demo = c("race_ethnicity", "demo_comb_income_v2", "demo_prnt_ed_v2", "demo_prtnr_ed_v2","demo_prim")
puberty = c('pds_p_ss_female_category_2', 'pds_p_ss_male_category_2')
#Subcortical volumes
subcort = c('smri_vol_scs_aal', 'smri_vol_scs_aar', 'smri_vol_scs_amygdalalh', 'smri_vol_scs_amygdalarh',
'smri_vol_scs_caudatelh', 'smri_vol_scs_caudaterh', 'smri_vol_scs_hpuslh', 'smri_vol_scs_hpusrh',
'smri_vol_scs_pallidumlh', 'smri_vol_scs_pallidumrh', 'smri_vol_scs_putamenlh', 'smri_vol_scs_putamenrh',
'smri_vol_scs_tplh', 'smri_vol_scs_tprh', 'smri_vol_scs_intracranialv', 'smri_vol_scs_subcorticalgv')
#parental psychopathology
any_hist <- c('fam_history_6_yes_no','fam_history_7_yes_no','fam_history_10_yes_no','famhx_4_p','fam_history_5_yes_no')
maternal = c('fam_history_q6d_depression','famhx_4d_p___0', 'fam_history_q5d_drugs___0',
'fam_history_q7d_mania', 'fam_history_q10d_nerves')
paternal = c('fam_history_q6a_depression','famhx4a_p___0', 'fam_history_q5a_drugs___0',
'fam_history_q7a_mania', 'fam_history_q10a_nerves')
#exclusion criteria
exclude = c("iqc_t1_ok_ser", "fsqc_qc", "mrif_score", "famhx_ss_momdad_vs_p")
ksads = c("ksads_1_843_p", "ksads_1_845_p", "ksads_1_844_p", "ksads_1_840_p", "ksads_1_841_p",
"ksads_1_842_p", "ksads_2_837_p", "ksads_2_835_p", "ksads_2_836_p", "ksads_2_831_p",
"ksads_2_832_p", "ksads_2_830_p", "ksads_2_833_p", "ksads_2_834_p", "ksads_3_848_p",
"ksads_4_851_p", "ksads_4_852_p", "ksads_5_857_p", "ksads_5_858_p", "ksads_6_859_p",
"ksads_6_860_p", "ksads_7_861_p", "ksads_7_862_p", "ksads_8_864_p", "ksads_8_863_p",
"ksads_10_869_p", "ksads_10_870_p", "ksads_11_917_p", "ksads_11_918_p", "ksads_13_939_p",
"ksads_13_938_p", "ksads_13_929_p", "ksads_13_934_p", "ksads_13_933_p", "ksads_13_932_p",
"ksads_13_931_p", "ksads_13_930_p", "ksads_13_936_p", "ksads_13_935_p", "ksads_13_937_p",
"ksads_13_940_p", "ksads_14_855_p", "ksads_14_853_p", "ksads_14_854_p", "ksads_15_901_p",
"ksads_15_902_p", "ksads_16_900_p", "ksads_16_897_p", "ksads_16_899_p", "ksads_16_898_p",
"ksads_19_891_p", "ksads_19_892_p", "ksads_20_874_p", "ksads_20_883_p", "ksads_20_872_p",
"ksads_20_881_p", "ksads_20_889_p", "ksads_20_890_p", "ksads_20_887_p", "ksads_20_878_p",
"ksads_20_877_p", "ksads_20_886_p", "ksads_20_875_p", "ksads_20_884_p", "ksads_20_876_p",
"ksads_20_885_p", "ksads_20_879_p", "ksads_20_888_p", "ksads_20_873_p", "ksads_20_882_p",
"ksads_20_880_p", "ksads_20_871_p", "ksads_21_921_p", "ksads_21_922_p")
#concatenate elements of interest
elem_int = c(subcort, maternal, paternal, any_hist, demo,
puberty, exclude, ksads)
###Find structures that contain element###
#This loops through each element of interst, and check if it exists in any of the structures.
#It stops on the first structure that it exists in, and then moves to the next element.
struct_log <- matrix(NA,length(elem_int),length(file_list))
for (iE in 1:length(elem_int)) { #For each element...
for (iS in 1:length(struct_list)) { #For each structure...
if (any(grepl(paste0('\\<',elem_int[iE],'\\>'),struct_list[[iS]])) == TRUE){#Does the element exist anywhere in the structure
struct_log[iE,iS] <- 1
break } #If yes, mark 1 and move on to next element
else {
struct_log[iE,iS] <- 0 } #If no, mark 0 and check next structure
}
}
#Then, take the max to identify which structures contain element of interest (ie, they have a 1)
struct_log[is.na(struct_log)] <- 0 #Make NA = 0
struct_ind <- as.logical(apply(struct_log,2,max,na.rm = TRUE))
struct_inc <- file_list[struct_ind]
#Create list of just structures of interest + column names
struct_int_list <- rep(list(mat),length(struct_inc))
for (i in 1:length(struct_inc)) {
struct_int_list[[i]] <- colnames(fread(struct_inc[i],header=TRUE,nrow=0))
}
#Add participant ID to elem_int list
elem_int_id <- c('subjectkey','eventname',elem_int)
#Loop through structures of interest
#identify which elements of interest they have
#If you have errors/warnings here, you probably need to check the element names you first specified
struct_subset <- rep(list(mat),length(struct_inc))
for (i in 1:length(struct_inc)){
elem_match <- lapply(elem_int_id,grep,struct_int_list[[i]], value = TRUE)
elem_match_cond <- sapply(elem_match, paste, collapse = " ")
elem_match_cond_cond <- elem_match_cond[elem_match_cond != ""]
struct_subset[[i]] <- fread(struct_inc[i], select = elem_match_cond_cond)
}
#One structure does not have 'eventname' element
struct_subset[[8]]$eventname <- struct_subset[[1]]$eventname
#Merge all structures
merged_df <- Reduce(function(...) merge(..., by=c('subjectkey','eventname'),
all=TRUE), struct_subset)
merged_df <- unique(merged_df)
#merge common variables
common_elems <- read.delim2("acspsw03.txt", header = TRUE)
common_elems_sub <- common_elems[common]
merged_df <- merge(merged_df,common_elems_sub,by = c('subjectkey','eventname'),all=TRUE)
#merge ACS weighting
acsw <- common_elems[c('subjectkey','eventname','acs_raked_propensity_score')]
merged_df <- merge(merged_df,acsw,by = c('subjectkey','eventname'),all=TRUE)
#merge site
site_id <- read.delim2("abcd_lt01.txt", header = TRUE)
site_id <- site_id[c('subjectkey','eventname','site_id_l')]
merged_df <- merge(merged_df,site_id,by = c('subjectkey','eventname'))
###YOU NOW HAVE YOUR VARIABLES OF INTEREST IN merged_df###
#OPTIONAL: Make instances of 555,777,888,999 = NA
merged_df[merged_df == 555] <- NA
merged_df[merged_df == 777] <- NA
merged_df[merged_df == 888] <- NA
merged_df[merged_df == 999] <- NA
#OPTIONAL: Subset by timepoint of interest. Change to whatever you're interest in.
merged_df_t1 <- subset(merged_df,eventname == 'baseline_year_1_arm_1')
#Optional: Apply Exclusion Criteria
#Can count changes in observations to determine # excluded for methods
merged_df_t1_incl <- subset(merged_df_t1, iqc_t1_ok_ser > 0)
merged_df_t1_incl <- subset(merged_df_t1_incl, fsqc_qc > 0)
merged_df_t1_incl <- subset(merged_df_t1_incl, mrif_score > 0)
#Excluding KSADS dx..
#ksads_cols <- merged_df_t1_incl[ , grep("ksad", colnames(merged_df_t1_incl))]#Find colnames that contain 'ksad'
merged_df_t1_incl[, "ksads_max"] <- apply(merged_df_t1_incl[,4:77], 1, max,na.rm=TRUE) #If any dx, max = 1
merged_df_t1_incl <- subset(merged_df_t1_incl, ksads_max == 0) #Exclude any dx
merged_df_t1_incl <- subset(merged_df_t1_incl, famhx_ss_momdad_vs_p == 0)
clean_df <- merged_df_t1_incl
clean_df[,exclude] <- list(NULL)
clean_df$ksads_max <- list(NULL)
clean_df[,ksads] <- list(NULL)
#Remove if missing ICV
clean_df <- drop_na(clean_df,smri_vol_scs_intracranialv)
#Data cleaned and ready to use!!
write.csv(clean_df,'C:/Users/mmatt/Desktop/Projects/psychopathology-risk/PsyRisk/Revisions/ABCD_ParRiskData.csv')