Skip to content

Commit

Permalink
Replaced RCurl with httr, more unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
colindouglas committed Dec 19, 2019
1 parent cb14eb9 commit 6d26323
Show file tree
Hide file tree
Showing 13 changed files with 57 additions and 38 deletions.
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ Description: A collection of tools to import and structure the (currently) singl
URL: http://github.com/colindouglas/retrosheet
Depends: R (>= 2.10)
License: GPL (>= 2)
Imports: RCurl (>= 1.95-4.5),
XML (>= 3.98-1.1),
Imports: XML (>= 3.98-1.1),
data.table (>= 1.9.4),
stringi (>= 0.4-1),
httr
httr (>= 1.4.1)
Note: NOTICE regarding the transfer of data from Retrosheet:
The information used here was obtained free of charge from
and is copyrighted by Retrosheet. Interested parties may
Expand Down
5 changes: 1 addition & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,16 @@ export(getParkIDs)
export(getPartialGamelog)
export(getRetrosheet)
export(getTeamIDs)
importFrom(RCurl,getCurlHandle)
importFrom(RCurl,getURL)
importFrom(RCurl,url.exists)
importFrom(XML,free)
importFrom(XML,htmlParse)
importFrom(XML,xpathSApply)
importFrom(data.table,fread)
importFrom(data.table,setnames)
importFrom(httr,GET)
importFrom(httr,content)
importFrom(httr,http_error)
importFrom(httr,write_disk)
importFrom(stats,setNames)
importFrom(stringi,stri_split_fixed)
importFrom(utils,download.file)
importFrom(utils,read.csv)
importFrom(utils,unzip)
14 changes: 7 additions & 7 deletions R/getFileNames.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
#'
#' @examples getFileNames()
#'
#' @importFrom RCurl getCurlHandle
#' @importFrom RCurl getURL
#' @importFrom httr GET content
#' @importFrom XML htmlParse
#' @importFrom XML xpathSApply
#' @importFrom XML free
Expand All @@ -22,19 +21,20 @@

getFileNames <- function() {
path <- c(event = "game.htm", gamelog = "gamelogs/index.html",
schedule = "schedule/index.html")
full <- sprintf("http://www.retrosheet.org/%s", path)
curl <- getCurlHandle()
schedule = "schedule/index.html")
full <- sprintf("https://www.retrosheet.org/%s", path)
#curl <- getCurlHandle()
docs <- lapply(full, function(x) {
content <- getURL(x, curl = curl)
#content <- getURL(x, curl = curl)
content <- httr::content(httr::GET(x))
htmlParse(content, asText = TRUE)
})
o <- function(pat, doc) {
fnames <- xpathSApply(doc,
path = "(//pre//a | //b/a)/@href", fun = basename)
grep(pat, fnames, value = TRUE)
}
part <- sprintf(c("%seve.zip", "gl%s.zip", "%ssked.txt"), "\\d+")
part <- sprintf(c("%seve.zip", "gl%s.zip", "%sSKED.ZIP"), "\\d+")
res <- setNames(Map(o, part, docs), names(path))
lapply(docs, free)
res
Expand Down
7 changes: 4 additions & 3 deletions R/getPartialGamelog.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
#' subset of \code{gamelogFields}, and \strong{not} the entire vector.
#' @param date One of either NULL (the default), or a single four-digit
#' character string identifying the date 'mmdd'
#' @param ... further arguments passed to \code{\link[utils]{download.file}}
#'
#' @importFrom data.table fread
#' @importFrom data.table setnames
#' @importFrom httr GET write_disk
#'
#' @export
#'
Expand All @@ -31,7 +31,7 @@
#' ## Get Homerun and RBI info for August 25, 2012 - with park ID
#' getPartialGamelog(glFields=f, date = "20120825")
#'
getPartialGamelog <- function(year, glFields, date = NULL, ...) {
getPartialGamelog <- function(year, glFields, date = NULL) {

## check 'glFields' against package variable 'retrosheetFields$gamelog'
if(identical(glFields, retrosheetFields$gamelog)) {
Expand All @@ -49,7 +49,8 @@ getPartialGamelog <- function(year, glFields, date = NULL, ...) {
## download the file
tmp <- tempfile()
on.exit(unlink(tmp))
download.file(full, destfile = tmp, ...)
#download.file(full, destfile = tmp, ...)
GET(full, write_disk(tmp, overwrite=TRUE))

## extract the text file
fname <- unzip(tmp, list = TRUE)$Name
Expand Down
5 changes: 2 additions & 3 deletions R/getRetrosheet.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#' @param stringsAsFactors logical. The \code{stringsAsFactors} argument as
#' used in \code{\link[base]{data.frame}}. Currently applicable to types "game" and "schedule".
#' @param cache character. Path to locale cache of retrosheet data. If file doesn't exist, cache for later. If the file exists, used cache instead of web version
#' @param ... further arguments passed to \code{\link[utils]{download.file}}.
#'
#' @return The following return values are possible for the given \code{type}
#' \itemize{
Expand Down Expand Up @@ -50,10 +49,10 @@
#' @importFrom httr http_error GET write_disk
#' @importFrom stringi stri_split_fixed
#' @importFrom stats setNames
#' @importFrom utils download.file read.csv unzip
#' @importFrom utils read.csv unzip
#' @export

getRetrosheet <- function(type, year, team, schedSplit = NULL, stringsAsFactors = FALSE, cache = NA, ...) {
getRetrosheet <- function(type, year, team, schedSplit = NULL, stringsAsFactors = FALSE, cache = NA) {

type <- match.arg(type, c("game", "play", "roster", "schedule"))

Expand Down
11 changes: 5 additions & 6 deletions R/getTeamIDs.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,27 @@
#' \code{team} argument of \code{getRetrosheet("play", year, team)}.
#'
#' @param year A single valid four-digit numeric year.
#' @param quiet logical. Passed to \code{\link[utils]{download.file}}
#' @param ... further arguments passed to \code{\link[utils]{download.file}}.
#'
#' @return If the file exists, a named vector of IDs for the given year.
#' Otherwise \code{NA}.
#'
#' @details All currently available years can be retrieved with
#' \code{type.convert(substr(getFileNames()$event, 1L, 4L))}
#'
#' @importFrom RCurl url.exists
#' @importFrom httr http_error GET write_disk
#' @importFrom data.table fread
#' @export
#'
#' @examples getTeamIDs(2010)
#'
getTeamIDs <- function(year, quiet = TRUE, ...) {
getTeamIDs <- function(year, quiet = TRUE) {
stopifnot(is.numeric(year), length(year) == 1L)
path <- sprintf("http://www.retrosheet.org/events/%deve.zip", year)
if (url.exists(path)) {
if (!http_error(path)) {
tmp <- tempfile()
on.exit(unlink(tmp))
download.file(path, destfile = tmp, quiet = quiet, ...)
#download.file(path, destfile = tmp, quiet = quiet, ...)
GET(path, write_disk(tmp, overwrite=TRUE))
} else {
available <- grep(year, getFileNames()$event)
if(!length(available)) {
Expand Down
4 changes: 1 addition & 3 deletions man/getPartialGamelog.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 1 addition & 4 deletions man/getRetrosheet.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 1 addition & 5 deletions man/getTeamIDs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions tests/testthat/test-getFileNames.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
test_that("getFileNames() returns the right result", {
result <- getFileNames()

expect_equal(length(result), 3)
expect_equal(length(result[[1]]), 102)
expect_equal(length(result[[2]]), 149)
expect_equal(length(result[[3]]), 143)
})
6 changes: 6 additions & 0 deletions tests/testthat/test-getParkIDs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
test_that("getParkIDs returns the right results", {
park_ids <- getParkIDs()

expect_equal(ncol(park_ids), 2)
expect_equal(nrow(park_ids), 254)
})
13 changes: 13 additions & 0 deletions tests/testthat/test-getPartialGamelog.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
test_that("getPartialGamelog examples all work", {
f <- grep("HR|RBI|Park", gamelogFields, value = TRUE)

example_1 <- getPartialGamelog(2012, glFields = f)
example_2 <- getPartialGamelog(glFields=f, date = "20120825")

expect_equal(nrow(example_1), 2430)
expect_equal(ncol(example_1), 8)


expect_equal(nrow(example_2), 14)
expect_equal(ncol(example_2), 8)
})
6 changes: 6 additions & 0 deletions tests/testthat/test-getTeamIDs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
test_that("getTeamIDs returns the right number of teams", {

expect_equal(length(getTeamIDs(2012)), 30)

expect_equal(length(getTeamIDs(1954)), 16)
})

0 comments on commit 6d26323

Please sign in to comment.