diff --git a/NEWS.md b/NEWS.md index f3fbfb85..ba5d2afc 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,6 +4,9 @@ (@teunbrand, #369). * Training on factor data no longer sorts the range after multiple training passes (#383) +* Attempt to make the sort behavior of the range consistent for character + vectors during training. Mixing of character and factor data will still lead + to different results depending on the training order. # scales 1.2.1 diff --git a/R/scale-discrete.R b/R/scale-discrete.R index 4ead4e64..38ec0c36 100644 --- a/R/scale-discrete.R +++ b/R/scale-discrete.R @@ -37,11 +37,16 @@ train_discrete <- function(new, existing = NULL, drop = FALSE, na.rm = FALSE) { } discrete_range <- function(old, new, drop = FALSE, na.rm = FALSE) { + is_factor <- is.factor(new) || is.factor(old) new <- clevels(new, drop = drop, na.rm = na.rm) if (is.null(old)) { return(new) } - if (!is.character(old)) old <- clevels(old, na.rm = na.rm) + if (!is.character(old)) { + old <- clevels(old, na.rm = na.rm) + } else { + old <- sort(old, na.last = if (na.rm) NA else TRUE) + } new_levels <- setdiff(new, as.character(old)) @@ -53,10 +58,10 @@ discrete_range <- function(old, new, drop = FALSE, na.rm = FALSE) { # Avoid sorting levels when dealing with factors to mimick behaviour of # clevels() - if (is.factor(new)) { + if (is_factor) { return(range) } - sort(range) + sort(range, na.last = if (na.rm) NA else TRUE) } clevels <- function(x, drop = FALSE, na.rm = FALSE) { diff --git a/tests/testthat/test-range.R b/tests/testthat/test-range.R index be6f0ef8..77d16b84 100644 --- a/tests/testthat/test-range.R +++ b/tests/testthat/test-range.R @@ -44,4 +44,5 @@ test_that("factor discrete ranges stay in order", { expect_equal(discrete_range(f, f), letters[3:1]) expect_equal(discrete_range(f, "c"), letters[3:1]) expect_equal(discrete_range(f, c("a", "b", "c")), letters[3:1]) + expect_equal(discrete_range(f, c("a", "b", "c", NA), na.rm = FALSE), letters[3:1]) })