Skip to content

Commit

Permalink
move example into vignette and add simple example
Browse files Browse the repository at this point in the history
  • Loading branch information
dblodgett-usgs committed Nov 19, 2024
1 parent 07fe428 commit 8fce8f5
Show file tree
Hide file tree
Showing 3 changed files with 372 additions and 359 deletions.
264 changes: 75 additions & 189 deletions R/calculate_area_intersection_weights.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' Area Weighted Intersection (areal implementation)
#' Area Weighted Intersection
#' @description Returns the fractional percent of each
#' feature in x that is covered by each intersecting feature
#' in y. These can be used as the weights in an area-weighted
Expand All @@ -11,209 +11,96 @@
#'
#' @param x sf data.frame source features including one geometry column and one identifier column
#' @param y sf data.frame target features including one geometry column and one identifier column
#' @param normalize logical return normalized weights or not. See details and examples.
#' @param normalize logical return normalized weights or not.
#'
#' Normalized weights express the fraction of **target** polygons covered by
#' a portion of each **source** polygon. They are normalized in that the area
#' of each **source** polygon has already been factored into the weight.
#'
#' Un-normalized weights express the fraction of **source** polygons covered by
#' a portion of each **target** polygon. This is a more general form that requires
#' knowledge of the area of each **source** polygon to derive area-weighted
#' statistics from **source** to **target.
#'
#' See details and examples for more regarding this distinction.
#'
#' @param allow_lonlat boolean If FALSE (the default) lon/lat target features are not allowed.
#' Intersections in lon/lat are generally not valid and problematic at the international date line.
#'
#' @return data.frame containing fraction of each feature in x that is
#' covered by each feature in y.
#'
#' @details
#'
#' Two versions of weights are available:
#'
#' If `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y
#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target)
#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5
#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that
#' feature.
#'
#' For `normalize = FALSE` the area weighted mean calculation must include the area of each
#' x (source) polygon as in:
#'
#' > *in this case, `area` is the area of source polygons and you would do this operation grouped
#' by target polygon id.*
#' `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y
#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target)
#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5
#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that
#' feature.
#'
#' For `normalize = FALSE` the area weighted mean calculation must include the area of each
#' x (source) polygon as in:
#'
#' > *in this case, `area` is the area of source polygons and you would do this operation grouped
#' by target polygon id.*
#'
#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)`
#'
#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)`
#'
#' If `normalize = TRUE`, weights are divided by the target polygon area such that weights
#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons.
#'
#' For `normalize = FALSE` the area weighted mean calculation no area is required
#' as in:
#' For `normalize = FALSE` the area weighted mean calculation no area is required
#' as in:
#'
#' > `sum( (val * w), na.rm = TRUE ) / sum(w)`
#' > `sum( (val * w), na.rm = TRUE ) / sum(w)`
#'
#' See examples for illustration of these two modes.
#' See examples for illustration of these two modes.
#'
#' @examples
#'
#' library(dplyr)
#' library(sf)
#' library(ncdfgeom)
#'
#' g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))
#'
#' a1 = sf::st_polygon(g) * 0.8
#' a2 = a1 + c(1, 2)
#' a3 = a1 + c(-1, 2)
#'
#' b1 = sf::st_polygon(g)
#' b2 = b1 + 2
#' b3 = b1 + c(-0.2, 2)
#' b4 = b1 + c(2.2, 0)
#'
#' a = sf::st_sfc(a1,a2,a3)
#'
#' b = sf::st_sfc(b1, b2, b3, b4)
#'
#' plot(c(a,b), border = NA)
#' plot(a, border = 'darkgreen', add = TRUE)
#' plot(b, border = 'red', add = TRUE)
#'
#' a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3)))
#' b <- sf::st_sf(b, data.frame(idb = c(7, 8, 9, 10)))
#'
#' text(sapply(sf::st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4),
#' sapply(sf::st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3),
#' a$ida, col = "darkgreen")
#'
#' text(sapply(sf::st_geometry(b), \(x) mean(x[[1]][,1]) + 0.4),
#' sapply(sf::st_geometry(b), \(x) mean(x[[1]][,2])),
#' b$idb, col = "red")
#'
#' sf::st_agr(a) <- sf::st_agr(b) <- "constant"
#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070)
#'
#' calculate_area_intersection_weights(a, b, normalize = FALSE)
#'
#' # NOTE: normalize = FALSE so weights sum to 1 per source polygon
#' # when source is fully within target.
#'
#' calculate_area_intersection_weights(a, b, normalize = TRUE)
#'
#' # NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap
#' # is ignored as if it does not exist.
#'
#' calculate_area_intersection_weights(b, a, normalize = FALSE)
#'
#' # NOTE: normalize = FALSE so weights never sum to 1 since no source is fully
#' # within target.
#'
#' calculate_area_intersection_weights(b, a, normalize = TRUE)
#'
#' # NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap
#' # is ignored as if it does not exist.
#'
#' # a more typical arrangement of polygons
#'
#' g <- list(rbind(c(-1,-1), c(1,-1), c(1,1),
#' c(-1,1), c(-1,-1)))
#'
#' a1 = st_polygon(g) * 0.75 + c(-.25, -.25)
#' a2 = a1 + 1.5
#' a3 = a1 + c(0, 1.5)
#' a4 = a1 + c(1.5, 0)
#'
#' b1 = st_polygon(g)
#' b2 = b1 + 2
#' b3 = b1 + c(0, 2)
#' b4 = b1 + c(2, 0)
#'
#' a = st_sfc(a1,a2, a3, a4)
#' b = st_sfc(b1, b2, b3, b4)
#'
#' b <- st_sf(b, data.frame(idb = c(1, 2, 3, 4)))
#' a <- st_sf(a, data.frame(ida = c(6, 7, 8, 9)))
#'
#' plot(st_geometry(b), border = 'red', lwd = 3)
#' plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE)
#'
#' text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4),
#' sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3),
#' a$ida, col = "darkgreen")
#'
#' text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4),
#' sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5),
#' b$idb, col = "red")
#'
#' st_agr(a) <- st_agr(b) <- "constant"
#' st_crs(b) <- st_crs(a) <- st_crs(5070)
#'
#' a$val <- c(1, 2, 3, 4)
#' a$a_areasqkm <- 1.5 ^ 2
#'
#' plot(a["val"], reset = FALSE)
#' plot(st_geometry(b), border = 'red', lwd = 3, add = TRUE, reset = FALSE)
#' plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE)
#'
#' text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4),
#' sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3),
#' a$ida, col = "darkgreen")
#'
#' text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4),
#' sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5),
#' b$idb, col = "red")
#'
#' # say we have data from `a` that we want sampled to `b`.
#' # this gives the percent of each `a` that intersects each `b`
#'
#' (a_b <- calculate_area_intersection_weights(
#' select(a, ida), select(b, idb), normalize = FALSE))
#'
#' # NOTE: `w` sums to 1 per `a` in all cases
#'
#' summarize(group_by(a_b, ida), w = sum(w))
#'
#' # Since normalize is false, we apply weights like:
#' st_drop_geometry(a) |>
#' left_join(a_b, by = "ida") |>
#' mutate(a_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `a`
#' group_by(idb) |> # group so we get one row per `b`
#' # now we calculate the value for each b with fraction of the area of each
#' # polygon in `a` per polygon in `b` with an equation like this:
#' summarize(
#' new_val = sum( (val * w * a_areasqkm), na.rm = TRUE ) / sum(w * a_areasqkm))
#'
#' # NOTE: `w` is the fraction of the polygon in a. We need to multiply w by the
#' # unique area of the polygon it is associated with to get the weighted mean weight.
#'
#' # we can go in reverse if we had data from b that we want sampled to a
#' library(sf)
#'
#' (b_a <- calculate_area_intersection_weights(
#' select(b, idb), select(a, ida), normalize = FALSE))
#' source <- st_sf(source_id = c(1, 2),
#' val = c(10, 20),
#' geom = st_as_sfc(c(
#' "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))",
#' "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))")))
#'
#' # NOTE: `w` sums to 1 per `b` (source) only where `b` is fully covered by `a` (target).
#' source$area <- as.numeric(st_area(source))
#'
#' summarize(group_by(b_a, idb), w = sum(w))
#' target <- st_sf(target_id = "a",
#' geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))"))
#'
#' # Now let's look at what happens if we set normalize = TRUE. Here we
#' # get `a` as source and `b` as target but normalize the weights so
#' # the area of a is built into `w`.
#' plot(source['val'], reset = FALSE)
#' plot(st_geometry(target), add = TRUE)
#'
#' (a_b <- calculate_area_intersection_weights(
#' select(a, ida), select(b, idb), normalize = TRUE))
#' (w <-
#' calculate_area_intersection_weights(source[c("source_id", "geom")],
#' target[c("target_id", "geom")],
#' normalize = FALSE, allow_lonlat = TRUE))
#'
#' # NOTE: if we summarize by `b` (target) `w` sums to 1 where above, with
#' # normalize = FALSE, `w` summed to one per `a` (source).
#' (res <-
#' merge(st_drop_geometry(source), w, by = "source_id"))
#'
#' summarize(group_by(a_b, idb), w = sum(w))
#' sum(res$val * res$w * res$area) / sum(res$w * res$area)
#'
#' # Since normalize is false, we apply weights like:
#' st_drop_geometry(a) |>
#' left_join(a_b, by = "ida") |>
#' group_by(idb) |> # group so we get one row per `b`
#' # now we weight by the percent of each polygon in `b` per polygon in `a`
#' summarize(new_val = sum( (val * w), na.rm = TRUE ))
#' (w <-
#' calculate_area_intersection_weights(source[c("source_id", "geom")],
#' target[c("target_id", "geom")],
#' normalize = TRUE, allow_lonlat = TRUE))
#' (res <-
#' merge(st_drop_geometry(source), w, by = "source_id"))
#'
#' # NOTE: `w` is the fraction of the polygon from `a` overlapping the polygon from `b`.
#' # The area of `a` is built into the weight so we just sum the weith times value oer polygon.
#' sum(res$val * res$w) / sum(res$w)
#'
#' @export
#' @importFrom sf st_intersection st_set_geometry st_area st_crs st_drop_geometry
#' @importFrom dplyr mutate group_by right_join select ungroup left_join mutate

calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = FALSE) {

if(missing(normalize)) {
warning("Required input normalize is missing, defaulting to FALSE.")
normalize <- FALSE
Expand Down Expand Up @@ -251,27 +138,26 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat =
int <- areal::aw_intersect(y,
source = x,
areaVar = "area_intersection")
int <- areal::aw_total(int, source = x,
id = "varx", # the unique id in the "source" x
areaVar = "area_intersection",
totalVar = "totalArea_x",
type = "extensive",
weight = "total")
int <- areal::aw_weight(int, areaVar = "area_intersection",
totalVar = "totalArea_x",
areaWeight = "areaWeight_x_y")

int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx")

if(!normalize) {

if(normalize) {
int <- areal::aw_total(int,
source = x,
id = "varx", # the unique id in the "source" x
areaVar = "area_intersection",
totalVar = "totalArea_x",
type = "extensive",
weight = "total")

# for normalized, we return the percent of each target covered by each source
int <- areal::aw_intersect(y,
source = x,
areaVar = "area_intersection")
int <- areal::aw_weight(int, areaVar = "area_intersection",
totalVar = "totalArea_x",
areaWeight = "areaWeight_x_y")

} else {

# for normalized, we sum the intersection area by the total target intersection area
int <- ungroup(mutate(group_by(int, vary), totalArea_y = sum(.data$area_intersection)))
int <- ungroup(mutate(group_by(int, vary),
totalArea_y = sum(.data$area_intersection)))

int <- areal::aw_weight(int,
areaVar = "area_intersection",
Expand All @@ -283,7 +169,7 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat =
int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx")

int <- select(int, varx, vary, w = "areaWeight_x_y")

names(int) <- c(id_x, id_y, "w")

return(dplyr::as_tibble(int))
Expand Down
Loading

0 comments on commit 8fce8f5

Please sign in to comment.