-
Notifications
You must be signed in to change notification settings - Fork 0
/
season_results_xml.R
115 lines (67 loc) · 2.49 KB
/
season_results_xml.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
## Cody Crunkilton
## Misc Projects - Debate
## Tabroom XML extraction
## note: functions sourced in.
setwd("C:/Users/Cody/Dropbox/debate_r")
source("C:/Users/Cody/Dropbox/functions.R")
library(xml2)
library(tidyverse)
library(readxl)
# downloading XML ---------------------------------------------------------
# load("tournaments_xml.Rdata") ## load the data directly
urls <- read_excel("debateresults_2018_2019.xlsx") %>%
mutate(tourn_id = id %>% str_remove(".*tourn_id=") %>%
str_remove("&result_id=.*")) %>%
arrange(major %>% desc)
## pulling the results
xml_list <- list()
for (i in 1:nrow(urls)) {
x <- NULL
attempt <- 0
while( is.null(x) && attempt <= 10 ) {
attempt <- attempt + 1
try(
x <- read_xml(paste0("http://www.tabroom.com/api/tourn_published.mhtml?tourn_id=", urls$tourn_id[i]))
)
Sys.sleep(10)
}
xml_list[[i]] <- x
# if(exists(xml_list[[i]])) {
# paste("success! completed", i)
# } ## tell me if it worked
}
#save(xml_list, file = "tournaments_xml_updated.Rdata")
# extracting --------------------------------------------------------------
one_df_forlapply <- function(x) {
one_df(x, words)
}
# remove label column function - because the "label" column made map break
remove_label <- function(x) {
x %>%
xml_find_all(paste0("//", "LABEL")) %>%
xml_remove()
}
## making a copy of the list to be safe
try2 <- xml_list
## removing the label
lapply(try2, remove_label)
season22 <- lapply(xml_list[-43], FUN = one_df_forlapply) ## removing D8, which is a problem for some reason. See "testing where the error happened" to explore
season222 <- do.call(rbind, lapply(season22, data.frame, stringsAsFactors=FALSE)) %>% as.tibble()
season222 %>% write_csv("season_2019.csv")
# testing for where the error happened ------------------------------------
for (i in 1:length(try3)){
one_df(try3[[i]], words) %>% print()
}
length(try2)
one_df(try2[[47]], words) ## prob w/ 43
make_all(try2[[42]], words)
for (i in 4:length(words)) {
extract_child(try2[[43]], words[i]) %>% print
}
extract_child(try2[[43]], words[7])
extract_child(try2[[43]], "JUDGE")
xml_object = try2[[43]]
try2[[42]] %>%
xml_find_all(paste0("//", "JUDGE")) %>%
map(xml_children) %>%
map_df(~map(setNames(xml_text(.), xml_name(.) %>% tolower), type.convert, as.is=TRUE)) # this is the line where the error happens.