-
Notifications
You must be signed in to change notification settings - Fork 3
/
make_database.R
30 lines (24 loc) · 962 Bytes
/
make_database.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
## make sqlite database from all.txt
install.packages('proto')
install.packages('DBI')
install.packages('chron')
install.packages('RSQLite')
install.packages('RSQLite.extfuns')
library(devtools)
install_github('read.csv.sql', 'alyssafrazee')
library(read.csv.sql)
dbfile = 'dat.db'
cat(file=dbfile)
statement = 'create table main.github as select * from file'
read.csv.sql('allh.txt', sql=statement, dbname=dbfile, sep='\t')
## remove last names (for anonymity/public data release):
allh = read.table('allh.txt', comment.char="", header=TRUE, sep='\t', quote='')
ss = function(x, pattern, slot=1, ...){
sapply(strsplit(x, pattern, ...), "[", slot)
}
first_name = ss(as.character(allh$owner_name), pattern=' ', slot=1)
allh$owner_name = first_name
write.table(allh, file='allh_anon.txt', quote=FALSE, row.names=FALSE,
col.names=TRUE, sep='\t')
cat(file='dat_anon.db')
read.csv.sql('allh_anon.txt', sql=statement, dbname='dat_anon.db', sep='\t')