-
Notifications
You must be signed in to change notification settings - Fork 0
/
00-BIOMAP.qmd
138 lines (111 loc) · 4.33 KB
/
00-BIOMAP.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
---
title: "Biomap federated analysis"
author: "Tu Hu"
date: "06/07/2022"
output: html_document
---
```{r}
library(dplyr)
library(gprofiler2)
```
```{r}
count_matrix <-
readr::read_csv("data/geo/count_matrix.csv")
gene_id <-
count_matrix$gene_name %>%
gprofiler2::gconvert() %>%
as_tibble()
gene_id_filt <-
gene_id %>%
group_by(input) %>%
tidyr::nest() %>%
mutate(n = purrr::map_int(data, nrow)) %>%
filter(n == 1) %>%
tidyr::unnest() %>%
dplyr::select(input, target)
count_matrix_filt <-
count_matrix %>%
filter(gene_name %in% gene_id_filt$input) %>%
left_join(gene_id_filt, by = c("gene_name" = "input")) %>%
dplyr::select(gene_id = target, contains("_")) %>%
dplyr::select(-gene_name) %>%
dplyr::select(gene_id, contains(c("LS"))) %>%
# dplyr::select(gene_id, starts_with("01")) %>%
distinct(gene_id, .keep_all = T)
# readr::write_csv(count_matrix_filt_bl, "data/biomap_count_matrix_filt_all_visit.csv")
```
```{r}
phenotype <-
openxlsx::read.xlsx("data/geo/geo_meta.xlsx") %>%
as_tibble()
meta_data_manuscript <-
openxlsx::read.xlsx("data/supplementary/table_s1.xlsx") %>%
as_tibble()
phenotype <-
tibble(
title = colnames(count_matrix_filt_bl)[-1]
) %>%
left_join(phenotype) %>%
select(id = title,
baseline_characteristics_sex = gender,
subject, skin_type,
scorad, easi_total_score, visit, date_visit, biopsy_area) %>%
left_join(meta_data_manuscript) %>%
rename(
# baseline_characteristics_age = age,
baseline_disease_activity_SCORAD = scorad,
baseline_disease_activity_EASI = easi_total_score) %>%
mutate(baseline_characteristics_sex= factor(baseline_characteristics_sex),
baseline_characteristics_drug_washout = factor("no"))
age_age_onset <-
phenotype %>% filter(visit == "01") %>% select(subject, age, age_onset) %>% distinct() %>% arrange(subject) %>%
mutate(baseline_characteristics_onset_age = case_when(
age_onset == "<1" ~ "0",
TRUE ~ age_onset
),
baseline_characteristics_age = age,
baseline_characteristics_onset_age = as.numeric(baseline_characteristics_onset_age)) %>%
select(-age_onset)
phenotype_age_onset <-
phenotype %>% left_join(age_age_onset, by = "subject") %>%
mutate(baseline_characteristics_onset_early_ad = ifelse(baseline_characteristics_onset_age <=2, "yes", "no"),
baseline_characteristics_onset_late_ad = ifelse(baseline_characteristics_onset_age > 18, "yes", "no")) %>%
select(id, visit, contains("baseline"))
```
```{r}
# file.copy("data/biomap_phenotype_bl.csv", "../adpso_endotypes_federated/data/biomap_phenotype_bl.csv", overwrite = T)
# file.copy("data/biomap_count_matrix_filt_bl.csv", "../adpso_endotypes_federated/data/biomap_count_matrix_filt_bl.csv", overwrite = T)
# v1
count_v1 <- count_matrix_filt %>% select(gene_id, starts_with("01_"))
meta_v1 <- phenotype_age_onset %>% filter(visit == "01") %>% select(-visit)
readr::write_csv(count_v1, "data/biomap_count_v1.csv")
readr::write_csv(meta_v1, "data/biomap_meta_v1.csv")
# V2
count_v2 <- count_matrix_filt %>% select(gene_id, starts_with("02_"))
meta_v2 <- phenotype_age_onset %>% filter(visit == "02") %>% select(-visit)
readr::write_csv(count_v2, "data/biomap_count_v2.csv")
readr::write_csv(meta_v2, "data/biomap_meta_v2.csv")
# V3
count_v3 <- count_matrix_filt %>% select(gene_id, starts_with("03_"))
meta_v3 <- phenotype_age_onset %>% filter(visit == "03")%>% select(-visit)
readr::write_csv(count_v3, "data/biomap_count_v3.csv")
readr::write_csv(meta_v3, "data/biomap_meta_v3.csv")
# V4
count_v4 <- count_matrix_filt %>% select(gene_id, starts_with("04_"))
meta_v4 <- phenotype_age_onset %>% filter(visit == "04")%>% select(-visit)
readr::write_csv(count_v4, "data/biomap_count_v4.csv")
readr::write_csv(meta_v4, "data/biomap_meta_v4.csv")
# V5
count_v5 <- count_matrix_filt %>% select(gene_id, starts_with("05_"))
meta_v5 <- phenotype_age_onset %>% filter(visit == "05")%>% select(-visit)
readr::write_csv(count_v5, "data/biomap_count_v5.csv")
readr::write_csv(meta_v5, "data/biomap_meta_v5.csv")
# all visit
count_all_visit <- count_matrix_filt
meta_all_visit <- phenotype_age_onset%>% select(-visit)
readr::write_csv(count_all_visit, "data/biomap_count_all_visit.csv")
readr::write_csv(meta_all_visit, "data/biomap_meta_all_visit.csv")
```
```{bash}
cp data/biomap* ../adpso_endotypes_federated/input
```