diff --git a/NEWS.md b/NEWS.md index 5be8da7..3965289 100644 --- a/NEWS.md +++ b/NEWS.md @@ -11,6 +11,8 @@ editor_options: **Bugfixes:** - `position_quasirandom()` default `dodge.width` is now `NULL` instead of 0 (#79) +- `geom_beeswarm()` now works with values of 0 on log scales, i.e. infinite + values (#87) - A few stray references to the deprecated `groupOnX` argument are removed. ## v0.7.1 diff --git a/R/position-beeswarm.R b/R/position-beeswarm.R index dd55003..b755a6d 100644 --- a/R/position-beeswarm.R +++ b/R/position-beeswarm.R @@ -1,7 +1,7 @@ #' An internal function to calculate new positions for geom_beeswarm -#' +#' #' @family position adjustments -#' @param data A data.frame containing plotting data in columns x and y. +#' @param data A data.frame containing plotting data in columns x and y. #' Usually obtained from data processed by ggplot2. #' @param yLim.expand y data limits plus a small expansion using `grDevices::extendrange` #' @param xRange x axis scale range @@ -9,60 +9,60 @@ #' @param method Method for arranging points (see Details below) #' @param cex Scaling for adjusting point spacing (see [beeswarm::swarmx()]). #' Values between 1 (default) and 3 tend to work best. -#' @param side Direction to perform jittering: 0: both directions; +#' @param side Direction to perform jittering: 0: both directions; #' 1: to the right or upwards; -1: to the left or downwards. #' @param priority Method used to perform point layout (see Details below) -#' @param fast Use compiled version of swarm algorithm? This option is ignored +#' @param fast Use compiled version of swarm algorithm? This option is ignored #' for all methods expect `"swarm"` and `"compactswarm"`. #' @param corral `string`. Method used to adjust points that would be placed to #' wide horizontally, default is `"none"`. See details below. #' @param corral.width `numeric`. Width of the corral, default is `0.9`. -#' -#' @details -#' **method:** specifies the algorithm used to avoid overlapping points. The +#' +#' @details +#' **method:** specifies the algorithm used to avoid overlapping points. The #' default `"swarm"` method places points in increasing order. If a point would #' overlap with an existing point, it is shifted sideways (along the group axis) #' by a minimal amount sufficient to avoid overlap. -#' -#' While the `"swarm"` method places points in a predetermined -#' order, the `"compactswarm"` method uses a greedy strategy to determine which -#' point will be placed next. This often leads to a more tightly-packed layout. -#' The strategy is very simple: on each iteration, a point that can be placed as -#' close as possible to the non-data axis is chosen and placed. If there are two +#' +#' While the `"swarm"` method places points in a predetermined +#' order, the `"compactswarm"` method uses a greedy strategy to determine which +#' point will be placed next. This often leads to a more tightly-packed layout. +#' The strategy is very simple: on each iteration, a point that can be placed as +#' close as possible to the non-data axis is chosen and placed. If there are two #' or more equally good points, `priority` is used to break ties. -#' +#' #' The other 3 methods first discretise the values along the data axis, in order -#' to create more efficient packing. The `"square"` method places points on a -#' square grid, whereas `"hex"` uses a hexagonal grid. `"centre"`/`"center"` -#' uses a square grid to produce a symmetric swarm. The number of break points -#' for discretisation is determined by a combination of the available plotting +#' to create more efficient packing. The `"square"` method places points on a +#' square grid, whereas `"hex"` uses a hexagonal grid. `"centre"`/`"center"` +#' uses a square grid to produce a symmetric swarm. The number of break points +#' for discretisation is determined by a combination of the available plotting #' area and the `cex` argument. -#' -#' **priority:** controls the order in which points are placed, which generally -#' has a noticeable effect on the plot appearance. `"ascending"` gives the -#' 'traditional' beeswarm plot. `"descending"` is the opposite. `"density"` -#' prioritizes points with higher local density. `"random"` places points in a +#' +#' **priority:** controls the order in which points are placed, which generally +#' has a noticeable effect on the plot appearance. `"ascending"` gives the +#' 'traditional' beeswarm plot. `"descending"` is the opposite. `"density"` +#' prioritizes points with higher local density. `"random"` places points in a #' random order. `"none"` places points in the order provided. -#' +#' #' **corral:** By default, swarms from different groups are not prevented from -#' overlapping, i.e. `"corral = "none"`. Thus, datasets that are very large or -#' unevenly distributed may produce ugly overlapping beeswarms. To control +#' overlapping, i.e. `"corral = "none"`. Thus, datasets that are very large or +#' unevenly distributed may produce ugly overlapping beeswarms. To control #' runaway points one can use the following methods. `"gutter"` collects runaway #' points along the boundary between groups. `"wrap"` implement periodic boundaries. #' `"random"` places runaway points randomly in the region. `"omit"` omits runaway #' points. -#' +#' #' @keywords internal #' @importFrom beeswarm swarmx -#' @seealso [geom_beeswarm()], [position_quasirandom()], +#' @seealso [geom_beeswarm()], [position_quasirandom()], #' [beeswarm::swarmx()] offset_beeswarm <- function( - data, + data, yLim.expand, xRange, yRange, method = "swarm", - cex = 1, + cex = 1, side = 0L, priority = "ascending", fast = TRUE, @@ -71,19 +71,42 @@ offset_beeswarm <- function( ) { if (method %in% c("swarm", "compactswarm")) { ## SWARM METHODS - + # Determine point size as per `ggbeeswarm` CRAN version 0.6.0 - + # divisor is a magic number to get a reasonable baseline # better option would be to figure out point size in user coords x.size <- xRange / 100 y.size <- yRange / 100 - + compact <- method == "compactswarm" - + + # Use this value for y when no values are infinite + y.value <- data$y + + mask.infinite <- is.infinite(data$y) + if (any(mask.infinite)) { + mask.na <- is.na(data$y) + mask.infinite.pos <- mask.infinite & data$y > 0 + mask.infinite.neg <- mask.infinite & data$y < 0 + if (all(mask.infinite | mask.na)) { + # set the infinity values to 1 and -1 (or NA for NA values) + data$y.inf <- sign(data$y) + } else { + range.non.inf <- range(data$y[!mask.na & !mask.infinite]) + # if the range of non-infinite values is 0, make the shift nonzero + shift.outside <- max(diff(range.non.inf), 100) + # Place the infinite values far outside of the range of the data + data$y.inf <- data$y + data$y.inf[mask.infinite.pos] <- range.non.inf[2] + shift.outside + data$y.inf[mask.infinite.neg] <- range.non.inf[1] - shift.outside + } + # Use this value for y when some values are infinite + y.value <- data$y.inf + } x.offset <- beeswarm::swarmx( - x = rep(0, length(data$y)), - y = data$y, + x = rep(0, length(data$y)), + y = y.value, xsize = x.size, ysize = y.size, cex = cex, side = side, priority = priority, @@ -92,26 +115,24 @@ offset_beeswarm <- function( } else { ## NON-SWARM METHODS # Determine point size as per `ggbeeswarm` CRAN version 0.6.0 - + # divisor is a magic number to get a reasonable baseline # better option would be to figure out point size in user coords x.size <- xRange / 100 * cex y.size <- yRange / 100 * cex - + # Hex method specific step if (method == "hex") y.size <- y.size * sqrt(3) / 2 # Determine positions along the y axis breaks <- seq(yLim.expand[1], yLim.expand[2] + y.size, by = y.size) - + mids <- (utils::head(breaks, -1) + utils::tail(breaks, -1)) / 2 # include.lowest = T to account for cases where all y values are the same, # which otherwise would result in NAs. Fixes issue #85. y.index <- sapply(data$y, cut, breaks = breaks, include.lowest=T, labels = FALSE) - y.pos <- sapply(y.index, function(a) mids[a]) - - + y.pos <- sapply(y.index, function(a) mids[a]) if (any(data$y != y.pos)) { cli::cli_warn(c( @@ -120,22 +141,21 @@ offset_beeswarm <- function( ), .frequency = "once", .frequency_id = "beeswarm_method_data_axis_warn") } data$y <- y.pos - + # Determine positions along the x axis x.index <- determine_pos(y.index, method, side) - - x.offset <- x.index * x.size + x.offset <- x.index * x.size } - + ## CORRAL RUNAWAY POINTS if (corral != "none") { corral.low <- (side - 1) * corral.width / 2 corral.high <- (side + 1) * corral.width / 2 - + if (corral == "gutter") { x.offset <- sapply( - x.offset, + x.offset, function(zz) pmin(corral.high, pmax(corral.low, zz)) ) } @@ -143,98 +163,98 @@ offset_beeswarm <- function( if (side == -1L) { # special case with side=-1: reverse the corral to avoid artefacts at zero x.offset <- sapply( - x.offset, + x.offset, function(zz) corral.high - ((corral.high - zz) %% corral.width) ) } else { x.offset <- sapply( - x.offset, + x.offset, function(zz) ((zz - corral.low) %% corral.width) + corral.low ) } } if (corral == 'random') { x.offset <- sapply( - x.offset, + x.offset, function(zz) ifelse( - zz > corral.high | zz < corral.low, - yes = stats::runif(length(zz), corral.low, corral.high), + zz > corral.high | zz < corral.low, + yes = stats::runif(length(zz), corral.low, corral.high), no = zz ) ) } if (corral == 'omit') { x.offset <- sapply( - x.offset, + x.offset, function(zz) ifelse( - zz > corral.high | zz < corral.low, - yes = NA, + zz > corral.high | zz < corral.low, + yes = NA, no = zz ) ) } } - + data$x <- data$x + x.offset - return(data) + data } #' Arrange points using the `\link[beeswarm]` package. -#' +#' #' @family position adjustments #' @param method Method for arranging points (see Details below) #' @param cex Scaling for adjusting point spacing (see [beeswarm::swarmx()]). #' Values between 1 (default) and 3 tend to work best. -#' @param side Direction to perform jittering: 0: both directions; +#' @param side Direction to perform jittering: 0: both directions; #' 1: to the right or upwards; -1: to the left or downwards. #' @param priority Method used to perform point layout (see Details below) -#' @param fast Use compiled version of swarm algorithm? This option is ignored +#' @param fast Use compiled version of swarm algorithm? This option is ignored #' for all methods expect `"swarm"` and `"compactswarm"`. -#' @param dodge.width Amount by which points from different aesthetic groups +#' @param dodge.width Amount by which points from different aesthetic groups #' will be dodged. This requires that one of the aesthetics is a factor. #' @param corral `string`. Method used to adjust points that would be placed to #' wide horizontally, default is `"none"`. See details below. #' @param corral.width `numeric`. Width of the corral, default is `0.9`. #' @param groupOnX `r lifecycle::badge("deprecated")` No longer needed. -#' @details -#' **method:** specifies the algorithm used to avoid overlapping points. The +#' @details +#' **method:** specifies the algorithm used to avoid overlapping points. The #' default `"swarm"` method places points in increasing order. If a point would #' overlap with an existing point, it is shifted sideways (along the group axis) #' by a minimal amount sufficient to avoid overlap. -#' -#' While the `"swarm"` method places points in a predetermined -#' order, the `"compactswarm"` method uses a greedy strategy to determine which -#' point will be placed next. This often leads to a more tightly-packed layout. -#' The strategy is very simple: on each iteration, a point that can be placed as -#' close as possible to the non-data axis is chosen and placed. If there are two +#' +#' While the `"swarm"` method places points in a predetermined +#' order, the `"compactswarm"` method uses a greedy strategy to determine which +#' point will be placed next. This often leads to a more tightly-packed layout. +#' The strategy is very simple: on each iteration, a point that can be placed as +#' close as possible to the non-data axis is chosen and placed. If there are two #' or more equally good points, `priority` is used to break ties. -#' +#' #' The other 3 methods first discretise the values along the data axis, in order -#' to create more efficient packing. The `"square"` method places points on a -#' square grid, whereas `"hex"` uses a hexagonal grid. `"centre"`/`"center"` -#' uses a square grid to produce a symmetric swarm. The number of break points -#' for discretisation is determined by a combination of the available plotting +#' to create more efficient packing. The `"square"` method places points on a +#' square grid, whereas `"hex"` uses a hexagonal grid. `"centre"`/`"center"` +#' uses a square grid to produce a symmetric swarm. The number of break points +#' for discretisation is determined by a combination of the available plotting #' area and the `cex` argument. -#' -#' **priority:** controls the order in which points are placed, which generally -#' has a noticeable effect on the plot appearance. `"ascending"` gives the -#' 'traditional' beeswarm plot. `"descending"` is the opposite. `"density"` -#' prioritizes points with higher local density. `"random"` places points in a +#' +#' **priority:** controls the order in which points are placed, which generally +#' has a noticeable effect on the plot appearance. `"ascending"` gives the +#' 'traditional' beeswarm plot. `"descending"` is the opposite. `"density"` +#' prioritizes points with higher local density. `"random"` places points in a #' random order. `"none"` places points in the order provided. -#' +#' #' **corral:** By default, swarms from different groups are not prevented from -#' overlapping, i.e. `"corral = "none"`. Thus, datasets that are very large or -#' unevenly distributed may produce ugly overlapping beeswarms. To control +#' overlapping, i.e. `"corral = "none"`. Thus, datasets that are very large or +#' unevenly distributed may produce ugly overlapping beeswarms. To control #' runaway points one can use the following methods. `"gutter"` collects runaway #' points along the boundary between groups. `"wrap"` implement periodic boundaries. #' `"random"` places runaway points randomly in the region. `"omit"` omits runaway #' points. -#' -#' +#' +#' #' @export #' @importFrom beeswarm swarmx -#' @seealso [geom_beeswarm()], [position_quasirandom()], +#' @seealso [geom_beeswarm()], [position_quasirandom()], #' [beeswarm::swarmx()] position_beeswarm <- function( method = "swarm", @@ -242,21 +262,21 @@ position_beeswarm <- function( side = 0L, priority = "ascending", fast = TRUE, - groupOnX = NULL, + groupOnX = NULL, dodge.width = 0, corral = "none", corral.width = 0.2 ) { - + if (!missing(groupOnX)) { lifecycle::deprecate_soft( - when = "0.7.1", what = "position_beeswarm(groupOnX)", + when = "0.7.1", what = "position_beeswarm(groupOnX)", details='ggplot2 now handles this case automatically.' ) - } + } if (method == "centre") method <- "center" - - ggproto(NULL, PositionBeeswarm, + + ggproto(NULL, PositionBeeswarm, method = method, cex = cex, side = side, @@ -268,15 +288,15 @@ position_beeswarm <- function( ) } -PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, +PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, required_aes = c('x', 'y'), setup_params = function(self, data) { flipped_aes <- has_flipped_aes(data) data <- flip_data(data, flipped_aes) - + # get y range of data and extend it a little yLim.expand <- grDevices::extendrange(data$y, f = 0.01) - + list( # groupOnX = self$groupOnX, deprecated method = self$method, @@ -288,12 +308,12 @@ PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, corral = self$corral, corral.width = self$corral.width, yLim.expand = yLim.expand, - flipped_aes = flipped_aes + flipped_aes = flipped_aes ) }, compute_panel = function(data, params, scales) { data <- flip_data(data, params$flipped_aes) - + # get plot limits if (params$flipped_aes) { xRange <- get_range(scales$y) @@ -302,7 +322,7 @@ PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, xRange <- get_range(scales$x) yRange <- get_range(scales$y) } - + data <- ggplot2:::collide( data, params$dodge.width, @@ -310,14 +330,14 @@ PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, strategy = ggplot2:::pos_dodge, check.width = FALSE ) - + # split data.frame into list of data.frames if(!is.null(params$dodge.width)) { data <- split(data, data$group) } else { data <- split(data, data$x) } - + # perform swarming separately for each data.frame data <- lapply( data, @@ -333,10 +353,10 @@ PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, corral = params$corral, corral.width = params$corral.width ) - + # recombine list of data.frames into one data <- Reduce(rbind, data) - + flip_data(data, params$flipped_aes) } ) @@ -344,7 +364,7 @@ PositionBeeswarm <- ggplot2::ggproto("PositionBeeswarm", Position, get_range <- function(scales) { if (is.null(scales$limits)) lim <- scales$range$range else lim <- scales$get_limits() - + if (inherits(scales, "ScaleContinuous")) { out <- diff(lim) } else if (inherits(scales, "ScaleDiscrete")) { @@ -352,17 +372,17 @@ get_range <- function(scales) { } else { stop("Unknown scale type") } - + if (out == 0) out <- 1 out } determine_pos <- function(v, method, side) { - # if(length(stats::na.omit(v)) == 0) + # if(length(stats::na.omit(v)) == 0) # return(v) - + v.s <- lapply(split(v, v), seq_along) - + if(method %in% c("center", "square") && side == -1) v.s <- lapply(v.s, function(a) a - max(a)) else if(method %in% c("center", "square") && side == 1)