Skip to content

Commit e1a30b0

Browse files
authored
feat: add excel serializer/parser (#975)
1 parent 2ba8a26 commit e1a30b0

9 files changed

+142
-1
lines changed

DESCRIPTION

+4-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,10 @@ Suggests:
5454
rapidoc,
5555
sf,
5656
ragg,
57-
svglite
57+
svglite,
58+
readxl,
59+
writexl,
60+
utils
5861
RoxygenNote: 7.3.2
5962
Collate:
6063
'async.R'

NAMESPACE

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export(include_rmd)
2727
export(is_plumber)
2828
export(options_plumber)
2929
export(parser_csv)
30+
export(parser_excel)
3031
export(parser_feather)
3132
export(parser_form)
3233
export(parser_geojson)
@@ -80,6 +81,7 @@ export(serializer_cat)
8081
export(serializer_content_type)
8182
export(serializer_csv)
8283
export(serializer_device)
84+
export(serializer_excel)
8385
export(serializer_feather)
8486
export(serializer_format)
8587
export(serializer_geojson)

NEWS.md

+6
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@
66
* `parse_rds()`, `parse_feather()`, and `parse_parquet()` no longer writes data to disk during parsing (@thomasp85, #942)
77
* Returning error messages are now turned off by default rather than being turned on if running interactively and turned off if not (@thomasp85, #962)
88

9+
* New serializers
10+
* `serializer_excel()`: Return an object serialized by `writexl::write_xlsx` (@r2evans, #973).
11+
12+
* New request body parsers
13+
* `parser_excel()`: Parse request body as an excel workbook using `readxl::read_excel` (@r2evans, #973). This defaults to loading in the first worksheet only, you can use `@parse excel list(sheet=NA)` to import all worksheets. This always returns a list of frames, even if just one worksheet.
14+
915
# plumber 1.2.2
1016

1117
* Allow to set plumber options using environment variables `?options_plumber`. (@meztez #934)

R/parse-body.R

+36
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,41 @@ parser_parquet <- function(...) {
510510
}
511511
}
512512

513+
# readxl's default behavior is to read only one worksheet at a time; in order for an endpoint to
514+
# read multiple worksheets, its documentation suggests to iterate over discovered names (c.f.,
515+
# https://readxl.tidyverse.org/articles/readxl-workflows.html#iterate-over-multiple-worksheets-in-a-workbook);
516+
# for this reason, this parser detects an NA in the 'sheet=' argument and replaces it with all
517+
# worksheet names found in the workbook
518+
519+
#' @describeIn parsers excel parser. See [readxl::read_excel()] for more details. (Defaults to reading in the first worksheet only, use `@parser excel list(sheet=NA)` to read in all worksheets.)
520+
#' @param sheet Sheet to read. Either a string (the name of a sheet), or an
521+
#' integer (the position of the sheet). Defaults to the first sheet. To read all
522+
#' sheets, use `NA`.
523+
#' @export
524+
parser_excel <- function(..., sheet = NULL) {
525+
if (!requireNamespace("readxl", quietly = TRUE)) {
526+
stop("`readxl` must be installed for `parser_excel` to work")
527+
}
528+
parse_fn <- parser_read_file(function(tmpfile) {
529+
if (is.null(sheet)) {
530+
# we have to hard-code this since lapply won't iterate if NULL
531+
sheet <- 1L
532+
} else if (anyNA(sheet)) {
533+
sheet <- readxl::excel_sheets(tmpfile)
534+
}
535+
if (is.character(sheet)) names(sheet) <- sheet
536+
out <- suppressWarnings(
537+
lapply(sheet, function(sht) {
538+
readxl::read_excel(path = tmpfile, sheet = sht, ...)
539+
})
540+
)
541+
out
542+
})
543+
function(value, ...) {
544+
parse_fn(value)
545+
}
546+
}
547+
513548
#' @describeIn parsers Octet stream parser. Returns the raw content.
514549
#' @export
515550
parser_octet <- function() {
@@ -588,6 +623,7 @@ register_parsers_onLoad <- function() {
588623
register_parser("rds", parser_rds, fixed = "application/rds")
589624
register_parser("feather", parser_feather, fixed = c("application/vnd.apache.arrow.file", "application/feather"))
590625
register_parser("parquet", parser_parquet, fixed = "application/vnd.apache.parquet")
626+
register_parser("excel", parser_excel, fixed = c("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"))
591627
register_parser("text", parser_text, fixed = "text/plain", regex = "^text/")
592628
register_parser("tsv", parser_tsv, fixed = c("application/tab-separated-values", "text/tab-separated-values"))
593629
# yaml types: https://stackoverflow.com/a/38000954/591574

R/serializer.R

+17
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,22 @@ serializer_parquet <- function(type = "application/vnd.apache.parquet") {
306306
)
307307
}
308308

309+
#' @describeIn serializers excel serializer. See also: [writexl::write_xlsx()]
310+
#' @export
311+
serializer_excel <- function(..., type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") {
312+
if (!requireNamespace("writexl", quietly = TRUE)) {
313+
stop("The writexl package is not available but is required in order to use the writexl serializer",
314+
call. = FALSE)
315+
}
316+
317+
serializer_write_file(
318+
fileext = ".xlsx",
319+
type = type,
320+
write_fn = function(val, tmpfile) {
321+
writexl::write_xlsx(x = val, path = tmpfile, ...)
322+
}
323+
)
324+
}
309325

310326
#' @describeIn serializers YAML serializer. See also: [yaml::as.yaml()]
311327
#' @export
@@ -693,6 +709,7 @@ add_serializers_onLoad <- function() {
693709
register_serializer("tsv", serializer_tsv)
694710
register_serializer("feather", serializer_feather)
695711
register_serializer("parquet", serializer_parquet)
712+
register_serializer("excel", serializer_excel)
696713
register_serializer("yaml", serializer_yaml)
697714
register_serializer("geojson", serializer_geojson)
698715

man/parsers.Rd

+9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/serializers.Rd

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-parse-body.R

+26
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,32 @@ test_that("Test parquet parser", {
134134
expect_equal(parsed, r_object)
135135
})
136136

137+
test_that("Test excel parser", {
138+
skip_if_not_installed("readxl")
139+
skip_if_not_installed("writexl")
140+
141+
tmp <- tempfile(fileext = ".xlsx")
142+
on.exit({
143+
file.remove(tmp)
144+
}, add = TRUE)
145+
146+
# note: factors will fail the round-trip test
147+
r_object <- data.frame(chr = LETTERS[1:3], int = 1:3, num = pi+1:3, lgl = c(TRUE, FALSE, NA))
148+
res <- try(writexl::write_xlsx(r_object, tmp), silent = TRUE)
149+
skip_if(
150+
inherits(res, "try-error"),
151+
"writexl::write_xlsx() isn't working."
152+
)
153+
154+
val <- readBin(tmp, "raw", 10000)
155+
156+
parsed <- parse_body(val, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", make_parser("excel"))
157+
# convert from tibble to data.frame
158+
parsed <- as.data.frame(parsed[[1]], stringsAsFactors = FALSE)
159+
160+
expect_equal(parsed, r_object)
161+
})
162+
137163
test_that("Test geojson parser", {
138164
skip_if_not_installed("geojsonsf")
139165
skip_if_not_installed("sf")
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
context("excel serializer")
2+
3+
test_that("excel serializes properly", {
4+
skip_if_not_installed("writexl")
5+
6+
d <- data.frame(a=1, b=2, c="hi")
7+
val <- serializer_excel()(d, data.frame(), PlumberResponse$new(), stop)
8+
expect_equal(val$status, 200L)
9+
expect_equal(val$headers$`Content-Type`, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
10+
11+
# the remaining relies on the fact that xlsx files start as zip files
12+
# https://en.wikipedia.org/wiki/List_of_file_signatures
13+
expect_equal(val$body[1:4], as.raw(c(0x50, 0x4b, 0x03, 0x04)))
14+
tf <- tempfile()
15+
on.exit(unlink(tf), add = TRUE)
16+
writeBin(val$body, tf)
17+
zipcontents <- expect_silent(utils::unzip(tf, list = TRUE))
18+
expect_s3_class(zipcontents, "data.frame")
19+
expect_true("xl/workbook.xml" %in% zipcontents$Name)
20+
21+
})
22+
23+
test_that("Errors call error handler", {
24+
skip_if_not_installed("writexl")
25+
26+
errors <- 0
27+
errHandler <- function(req, res, err){
28+
errors <<- errors + 1
29+
}
30+
31+
expect_equal(errors, 0)
32+
serializer_excel()(parse(text="hi"), data.frame(), PlumberResponse$new("csv"), errorHandler = errHandler)
33+
expect_equal(errors, 1)
34+
})

0 commit comments

Comments
 (0)