-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconvert_vcf_matrix.R
122 lines (80 loc) · 2.65 KB
/
convert_vcf_matrix.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
### This script was used in Karbstein et al. (2021, https://onlinelibrary.wiley.com/doi/10.1111/mec.15919) to convert the in MS Excel imported *vcf file into a file assessable for genetic heterozygosity calculations
## "We transformed information about ref- erence and alternative alleles of the *.vcf file (diploid SNP calls, four allowed alleles per locus). Homozygous sites were assigned a value of 0, and heterozygous sites a value of 1. Then, we calculated the ratio of heterozygous sites to all sites per individual (Table S3)."
if (!requireNamespace("openxlsx", quietly = TRUE))
install.packages("openxlsx", repo="http://cran.rstudio.com/")
library(openxlsx)
dat<-read.xlsx("all_het_sites.xlsx")
str(dat)
head(dat)
dat <- data.frame(lapply(dat, function(x){
gsub("0/0", "0", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/1", "0", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("0/1", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/2", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/3", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("0/2", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("0/3", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/2", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/3", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("2/3", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("1/0", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("2/0", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("3/0", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("2/2", "0", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("3/3", "0", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("2/1", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("3/2", "1", x)
}))
dat <- data.frame(lapply(dat, function(x){
gsub("3/1", "1", x)
}))
# check column classes
sapply(dat, class)
dat_only_values<-dat[, 10:313]
dat_stat<-summary(dat_only_values)
write.csv(dat_stat, "dat_stat.csv")
write.csv(dat_only_values, "dat_only_values.csv")
if (!requireNamespace("dplyr", quietly = TRUE))
install.packages("dplyr", repo="http://cran.rstudio.com/")
library(dplyr)
if (!requireNamespace("magrittr", quietly = TRUE))
install.packages("magrittr", repo="http://cran.rstudio.com/")
library(magrittr)
# solution
dat_only_values %<>% mutate_if(is.factor, as.numeric)
sapply(dat_only_values, class)
str(dat_only_values)
col_sums<-colSums(dat_only_values, na.rm = TRUE)
write.csv(dat_only_values, "dat_only_values.csv")