Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added naStrings option to 'null_to_na()' and precessors #318

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ Suggests:
rmarkdown,
R.rsp,
sp
RoxygenNote: 7.0.2
RoxygenNote: 7.1.0
14 changes: 9 additions & 5 deletions R/fromJSON.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#' @param factor how to encode factor objects: must be one of 'string' or 'integer'
#' @param complex how to encode complex numbers: must be one of 'string' or 'list'
#' @param raw how to encode raw objects: must be one of 'base64', 'hex' or 'mongo'
#' @param naStrings which strings to treat as NA when 'simplifyVector' is 'TRUE'. Defaults to c("NA", "NaN", "Inf", "-Inf")
#' @param null how to encode NULL values within a list: must be one of 'null' or 'list'
#' @param na how to print NA values: must be one of 'null' or 'string'. Defaults are class specific
#' @param auto_unbox automatically \code{\link{unbox}} all atomic vectors of length 1. It is usually safer to avoid this and instead use the \code{\link{unbox}} function to unbox individual elements.
Expand Down Expand Up @@ -75,7 +76,8 @@
#' identical(data3, flatten(data2))
#' }
fromJSON <- function(txt, simplifyVector = TRUE, simplifyDataFrame = simplifyVector,
simplifyMatrix = simplifyVector, flatten = FALSE, ...) {
simplifyMatrix = simplifyVector, flatten = FALSE,
naStrings = c("NA", "NaN", "Inf", "-Inf"), ...) {

# check type
if (!is.character(txt) && !inherits(txt, "connection")) {
Expand All @@ -98,11 +100,13 @@ fromJSON <- function(txt, simplifyVector = TRUE, simplifyDataFrame = simplifyVec

# call the actual function (with deprecated arguments)
parse_and_simplify(txt = txt, simplifyVector = simplifyVector, simplifyDataFrame = simplifyDataFrame,
simplifyMatrix = simplifyMatrix, flatten = flatten, ...)
simplifyMatrix = simplifyMatrix, flatten = flatten, naStrings = naStrings, ...)
}

parse_and_simplify <- function(txt, simplifyVector = TRUE, simplifyDataFrame = simplifyVector,
simplifyMatrix = simplifyVector, flatten = FALSE, unicode = TRUE, validate = TRUE, bigint_as_char = FALSE, ...){
parse_and_simplify <- function(txt, simplifyVector = TRUE,
simplifyDataFrame = simplifyVector, simplifyMatrix = simplifyVector,
flatten = FALSE, unicode = TRUE, validate = TRUE, bigint_as_char = FALSE,
naStrings = c("NA", "NaN", "Inf", "-Inf"), ...){

if(!missing(unicode)){
message("Argument unicode has been deprecated. YAJL always parses unicode.")
Expand All @@ -118,7 +122,7 @@ parse_and_simplify <- function(txt, simplifyVector = TRUE, simplifyDataFrame = s
# post processing
if (any(isTRUE(simplifyVector), isTRUE(simplifyDataFrame), isTRUE(simplifyMatrix))) {
return(simplify(obj, simplifyVector = simplifyVector, simplifyDataFrame = simplifyDataFrame,
simplifyMatrix = simplifyMatrix, flatten = flatten, ...))
simplifyMatrix = simplifyMatrix, flatten = flatten, naStrings = naStrings, ...))
} else {
return(obj)
}
Expand Down
4 changes: 2 additions & 2 deletions R/list_to_vec.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
list_to_vec <- function(x) {
list_to_vec <- function(x, naStrings = c("NA", "NaN", "Inf", "-Inf")) {
isdates <- is_datelist(x)
out <- unlist(null_to_na(x), recursive = FALSE, use.names = FALSE)
out <- unlist(null_to_na(x, naStrings), recursive = FALSE, use.names = FALSE)
if(isdates && is.numeric(out)){
structure(out, class = c("POSIXct", "POSIXt"))
} else{
Expand Down
4 changes: 2 additions & 2 deletions R/null_to_na.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#' @useDynLib jsonlite C_null_to_na
null_to_na <- function(x) {
.Call(C_null_to_na, x)
null_to_na <- function(x, naStrings) {
.Call(C_null_to_na, x, naStrings)
}

#' @useDynLib jsonlite C_is_datelist
Expand Down
10 changes: 6 additions & 4 deletions R/simplify.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
simplify <- function(x, simplifyVector = TRUE, simplifyDataFrame = TRUE, simplifyMatrix = TRUE,
simplifyDate = simplifyVector, homoList = TRUE, flatten = FALSE, columnmajor = FALSE,
simplifySubMatrix = simplifyMatrix) {
simplifySubMatrix = simplifyMatrix, naStrings = c("NA", "NaN", "Inf", "-Inf")) {

#This includes '[]' and '{}')
if (!is.list(x) || !length(x)) {
Expand All @@ -9,7 +9,8 @@ simplify <- function(x, simplifyVector = TRUE, simplifyDataFrame = TRUE, simplif

# list can be a dataframe recordlist
if (isTRUE(simplifyDataFrame) && is.recordlist(x)) {
mydf <- simplifyDataFrame(x, flatten = flatten, simplifyMatrix = simplifySubMatrix)
mydf <- simplifyDataFrame(x, flatten = flatten,
simplifyMatrix = simplifySubMatrix, naStrings = naStrings)
if(isTRUE(simplifyDate) && is.data.frame(mydf) && is.datelist(mydf)){
return(parse_date(mydf[["$date"]]))
}
Expand All @@ -18,12 +19,13 @@ simplify <- function(x, simplifyVector = TRUE, simplifyDataFrame = TRUE, simplif

# or a scalar list (atomic vector)
if (isTRUE(simplifyVector) && is.null(names(x)) && is.scalarlist(x)) {
return(list_to_vec(x))
return(list_to_vec(x, naStrings = naStrings))
}

# apply recursively
out <- lapply(x, simplify, simplifyVector = simplifyVector, simplifyDataFrame = simplifyDataFrame,
simplifyMatrix = simplifySubMatrix, columnmajor = columnmajor, flatten = flatten)
simplifyMatrix = simplifySubMatrix, columnmajor = columnmajor, flatten = flatten,
naStrings = naStrings)

# fix for mongo style dates turning into scalars *after* simplifying
# only happens when simplifyDataframe=FALSE
Expand Down
5 changes: 3 additions & 2 deletions R/simplifyDataFrame.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
simplifyDataFrame <- function(recordlist, columns, flatten, simplifyMatrix) {
simplifyDataFrame <- function(recordlist, columns, flatten, simplifyMatrix, naStrings) {

# no records at all
if (!length(recordlist)) {
Expand Down Expand Up @@ -27,7 +27,8 @@ simplifyDataFrame <- function(recordlist, columns, flatten, simplifyMatrix) {

# simplify vectors and nested data frames
columnlist <- lapply(columnlist, simplify, simplifyVector = TRUE, simplifyDataFrame = TRUE,
simplifyMatrix = FALSE, simplifySubMatrix = simplifyMatrix, flatten = flatten)
simplifyMatrix = FALSE, simplifySubMatrix = simplifyMatrix, flatten = flatten,
naStrings = naStrings)

# check that all elements have equal length
columnlengths <- unlist(vapply(columnlist, function(z) {
Expand Down
1 change: 1 addition & 0 deletions jsonlite.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@ StripTrailingWhitespace: Yes

BuildType: Package
PackageInstallArgs: --no-multiarch --with-keep.source --install-tests
PackageRoxygenize: rd,collate,namespace
3 changes: 3 additions & 0 deletions man/fromJSON.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 26 additions & 9 deletions src/null_to_na.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,37 @@

/*
This function takes a list and replaces all NULL values by NA.
In addition, it will parse strings "NA" "NaN" "Inf" and "-Inf",
unless there is at least one non-na string element in the list.
In addition, it will replace strings matched by 'naStrings'
(defaults to "NA" "NaN" "Inf" and "-Inf") with NA, unless there is
at least one non-na string element in the list.
In that case converting to real values has no point because
unlist() will coerse them back into a string anyway.
*/

SEXP C_null_to_na(SEXP x) {
SEXP C_null_to_na(SEXP x, SEXP naStrings) {
int len = length(x);
if(len == 0) return x;

//null always turns into NA
int len_naStrings = length(naStrings);
bool looks_like_na_string = false;
bool looks_like_character_vector = false;

for (int i=0; i<len; i++) {
if(VECTOR_ELT(x, i) == R_NilValue) {
//null always turns into NA
SET_VECTOR_ELT(x, i, ScalarLogical(NA_LOGICAL));
} else if(!looks_like_character_vector && TYPEOF(VECTOR_ELT(x, i)) == STRSXP){
if((strcmp("NA", CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0) ||
(strcmp("NaN", CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0) ||
(strcmp("Inf", CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0) ||
(strcmp("-Inf", CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0)) continue;
looks_like_character_vector = true;
looks_like_na_string = false;
for (int j=0; j < len_naStrings; j++) {
if(!looks_like_na_string &&
strcmp(CHAR(STRING_ELT(naStrings, j)),
CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0) {
looks_like_na_string = true;
}
}
if(!looks_like_na_string) {
looks_like_character_vector = true;
}
}
}

Expand All @@ -51,6 +61,13 @@ SEXP C_null_to_na(SEXP x) {
SET_VECTOR_ELT(x, i, ScalarReal(R_NegInf));
continue;
}
for (int j=0; j < len_naStrings; j++) {
if(strcmp(CHAR(STRING_ELT(naStrings, j)),
CHAR(STRING_ELT(VECTOR_ELT(x, i), 0))) == 0) {
SET_VECTOR_ELT(x, i, ScalarLogical(NA_LOGICAL));
break;
}
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/register.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ extern SEXP C_escape_chars(SEXP);
extern SEXP C_is_datelist(SEXP);
extern SEXP C_is_recordlist(SEXP);
extern SEXP C_is_scalarlist(SEXP);
extern SEXP C_null_to_na(SEXP);
extern SEXP C_null_to_na(SEXP, SEXP);
extern SEXP C_row_collapse_array(SEXP, SEXP);
extern SEXP C_row_collapse_object(SEXP, SEXP, SEXP);
extern SEXP C_transpose_list(SEXP, SEXP);
Expand All @@ -37,7 +37,7 @@ static const R_CallMethodDef CallEntries[] = {
{"C_is_datelist", (DL_FUNC) &C_is_datelist, 1},
{"C_is_recordlist", (DL_FUNC) &C_is_recordlist, 1},
{"C_is_scalarlist", (DL_FUNC) &C_is_scalarlist, 1},
{"C_null_to_na", (DL_FUNC) &C_null_to_na, 1},
{"C_null_to_na", (DL_FUNC) &C_null_to_na, 2},
{"C_row_collapse_array", (DL_FUNC) &C_row_collapse_array, 2},
{"C_row_collapse_object", (DL_FUNC) &C_row_collapse_object, 3},
{"C_transpose_list", (DL_FUNC) &C_transpose_list, 2},
Expand Down
33 changes: 33 additions & 0 deletions tests/testthat/test-fromJSON-custom-na-strings.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
context("fromJSON custom NA strings")

test_that("fromJSON custom NA strings", {

mixed = c("FOO","NA", NA, "NaN")
single_na = c("NA")
only_na = c("NA", "NaN", "Inf", "-Inf")
mixed_na = c("NA", "Inf", ".")
mixed_text_and_real_na = c(NA, Inf, "NA", ".")

#test old behavior
expect_that(fromJSON(toJSON(mixed)), equals(mixed))
expect_that(fromJSON(toJSON(single_na)), equals(as.logical(NA)))
expect_that(fromJSON(toJSON(only_na)),
equals(c(as.logical(NA), as.double(NaN), as.double(Inf), as.double(-Inf))))
expect_that(fromJSON(toJSON(mixed_na)), equals(mixed_na))
expect_that(fromJSON(toJSON(mixed_text_and_real_na)),
equals(c(as.character(NA), "Inf", "NA", ".")))

#test new behavior
expect_that(fromJSON(toJSON(mixed), naStrings = ""), equals(mixed))
expect_that(fromJSON(toJSON(single_na), naStrings = ""), equals(single_na))
expect_that(fromJSON(toJSON(single_na), naStrings = NULL), equals(single_na))
expect_that(fromJSON(toJSON(only_na), naStrings = ""), equals(only_na))
expect_that(fromJSON(toJSON(only_na), naStrings = NULL), equals(only_na))
expect_that(fromJSON(toJSON(only_na), naStrings = c("NA", "NaN", "Inf")), equals(only_na))
expect_that(fromJSON(toJSON(only_na), naStrings = c("NA", "NaN", "Inf", "-Inf")),
equals(c(as.logical(NA), as.double(NaN), as.double(Inf), as.double(-Inf))))
expect_that(fromJSON(toJSON(mixed_na), naStrings = c("NA", ".", "Inf")),
equals(c(as.logical(NA), as.double(Inf), as.logical(NA))))
expect_that(fromJSON(toJSON(mixed_text_and_real_na), naStrings = c("NA", ".", "Inf")),
equals(c(as.logical(NA), as.double(Inf), rep(as.logical(NA), 2))))
})