diff --git a/R/calculate_area_intersection_weights.R b/R/calculate_area_intersection_weights.R index 8c64688..30d1529 100644 --- a/R/calculate_area_intersection_weights.R +++ b/R/calculate_area_intersection_weights.R @@ -1,4 +1,4 @@ -#' Area Weighted Intersection (areal implementation) +#' Area Weighted Intersection #' @description Returns the fractional percent of each #' feature in x that is covered by each intersecting feature #' in y. These can be used as the weights in an area-weighted @@ -11,9 +11,22 @@ #' #' @param x sf data.frame source features including one geometry column and one identifier column #' @param y sf data.frame target features including one geometry column and one identifier column -#' @param normalize logical return normalized weights or not. See details and examples. +#' @param normalize logical return normalized weights or not. +#' +#' Normalized weights express the fraction of **target** polygons covered by +#' a portion of each **source** polygon. They are normalized in that the area +#' of each **source** polygon has already been factored into the weight. +#' +#' Un-normalized weights express the fraction of **source** polygons covered by +#' a portion of each **target** polygon. This is a more general form that requires +#' knowledge of the area of each **source** polygon to derive area-weighted +#' statistics from **source** to **target. +#' +#' See details and examples for more regarding this distinction. +#' #' @param allow_lonlat boolean If FALSE (the default) lon/lat target features are not allowed. #' Intersections in lon/lat are generally not valid and problematic at the international date line. +#' #' @return data.frame containing fraction of each feature in x that is #' covered by each feature in y. #' @@ -21,199 +34,73 @@ #' #' Two versions of weights are available: #' -#' If `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y -#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) -#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 -#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that -#' feature. -#' -#' For `normalize = FALSE` the area weighted mean calculation must include the area of each -#' x (source) polygon as in: -#' -#' > *in this case, `area` is the area of source polygons and you would do this operation grouped -#' by target polygon id.* +#' `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y +#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) +#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 +#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that +#' feature. +#' +#' For `normalize = FALSE` the area weighted mean calculation must include the area of each +#' x (source) polygon as in: +#' +#' > *in this case, `area` is the area of source polygons and you would do this operation grouped +#' by target polygon id.* #' -#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` -#' +#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` +#' #' If `normalize = TRUE`, weights are divided by the target polygon area such that weights #' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. #' -#' For `normalize = FALSE` the area weighted mean calculation no area is required -#' as in: +#' For `normalize = FALSE` the area weighted mean calculation no area is required +#' as in: #' -#' > `sum( (val * w), na.rm = TRUE ) / sum(w)` +#' > `sum( (val * w), na.rm = TRUE ) / sum(w)` #' -#' See examples for illustration of these two modes. +#' See examples for illustration of these two modes. #' #' @examples -#' -#' library(dplyr) -#' library(sf) -#' library(ncdfgeom) -#' -#' g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) -#' -#' a1 = sf::st_polygon(g) * 0.8 -#' a2 = a1 + c(1, 2) -#' a3 = a1 + c(-1, 2) -#' -#' b1 = sf::st_polygon(g) -#' b2 = b1 + 2 -#' b3 = b1 + c(-0.2, 2) -#' b4 = b1 + c(2.2, 0) -#' -#' a = sf::st_sfc(a1,a2,a3) -#' -#' b = sf::st_sfc(b1, b2, b3, b4) -#' -#' plot(c(a,b), border = NA) -#' plot(a, border = 'darkgreen', add = TRUE) -#' plot(b, border = 'red', add = TRUE) -#' -#' a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3))) -#' b <- sf::st_sf(b, data.frame(idb = c(7, 8, 9, 10))) -#' -#' text(sapply(sf::st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), -#' sapply(sf::st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), -#' a$ida, col = "darkgreen") -#' -#' text(sapply(sf::st_geometry(b), \(x) mean(x[[1]][,1]) + 0.4), -#' sapply(sf::st_geometry(b), \(x) mean(x[[1]][,2])), -#' b$idb, col = "red") #' -#' sf::st_agr(a) <- sf::st_agr(b) <- "constant" -#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) -#' -#' calculate_area_intersection_weights(a, b, normalize = FALSE) -#' -#' # NOTE: normalize = FALSE so weights sum to 1 per source polygon -#' # when source is fully within target. -#' -#' calculate_area_intersection_weights(a, b, normalize = TRUE) -#' -#' # NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap -#' # is ignored as if it does not exist. -#' -#' calculate_area_intersection_weights(b, a, normalize = FALSE) -#' -#' # NOTE: normalize = FALSE so weights never sum to 1 since no source is fully -#' # within target. -#' -#' calculate_area_intersection_weights(b, a, normalize = TRUE) -#' -#' # NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap -#' # is ignored as if it does not exist. -#' -#' # a more typical arrangement of polygons -#' -#' g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), -#' c(-1,1), c(-1,-1))) -#' -#' a1 = st_polygon(g) * 0.75 + c(-.25, -.25) -#' a2 = a1 + 1.5 -#' a3 = a1 + c(0, 1.5) -#' a4 = a1 + c(1.5, 0) -#' -#' b1 = st_polygon(g) -#' b2 = b1 + 2 -#' b3 = b1 + c(0, 2) -#' b4 = b1 + c(2, 0) -#' -#' a = st_sfc(a1,a2, a3, a4) -#' b = st_sfc(b1, b2, b3, b4) -#' -#' b <- st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -#' a <- st_sf(a, data.frame(ida = c(6, 7, 8, 9))) -#' -#' plot(st_geometry(b), border = 'red', lwd = 3) -#' plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE) -#' -#' text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), -#' sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), -#' a$ida, col = "darkgreen") -#' -#' text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4), -#' sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5), -#' b$idb, col = "red") -#' -#' st_agr(a) <- st_agr(b) <- "constant" -#' st_crs(b) <- st_crs(a) <- st_crs(5070) -#' -#' a$val <- c(1, 2, 3, 4) -#' a$a_areasqkm <- 1.5 ^ 2 -#' -#' plot(a["val"], reset = FALSE) -#' plot(st_geometry(b), border = 'red', lwd = 3, add = TRUE, reset = FALSE) -#' plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE) -#' -#' text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), -#' sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), -#' a$ida, col = "darkgreen") -#' -#' text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4), -#' sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5), -#' b$idb, col = "red") -#' -#' # say we have data from `a` that we want sampled to `b`. -#' # this gives the percent of each `a` that intersects each `b` -#' -#' (a_b <- calculate_area_intersection_weights( -#' select(a, ida), select(b, idb), normalize = FALSE)) -#' -#' # NOTE: `w` sums to 1 per `a` in all cases -#' -#' summarize(group_by(a_b, ida), w = sum(w)) -#' -#' # Since normalize is false, we apply weights like: -#' st_drop_geometry(a) |> -#' left_join(a_b, by = "ida") |> -#' mutate(a_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `a` -#' group_by(idb) |> # group so we get one row per `b` -#' # now we calculate the value for each b with fraction of the area of each -#' # polygon in `a` per polygon in `b` with an equation like this: -#' summarize( -#' new_val = sum( (val * w * a_areasqkm), na.rm = TRUE ) / sum(w * a_areasqkm)) -#' -#' # NOTE: `w` is the fraction of the polygon in a. We need to multiply w by the -#' # unique area of the polygon it is associated with to get the weighted mean weight. -#' -#' # we can go in reverse if we had data from b that we want sampled to a +#' library(sf) #' -#' (b_a <- calculate_area_intersection_weights( -#' select(b, idb), select(a, ida), normalize = FALSE)) +#' source <- st_sf(source_id = c(1, 2), +#' val = c(10, 20), +#' geom = st_as_sfc(c( +#' "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))", +#' "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))"))) #' -#' # NOTE: `w` sums to 1 per `b` (source) only where `b` is fully covered by `a` (target). +#' source$area <- as.numeric(st_area(source)) #' -#' summarize(group_by(b_a, idb), w = sum(w)) +#' target <- st_sf(target_id = "a", +#' geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))")) #' -#' # Now let's look at what happens if we set normalize = TRUE. Here we -#' # get `a` as source and `b` as target but normalize the weights so -#' # the area of a is built into `w`. +#' plot(source['val'], reset = FALSE) +#' plot(st_geometry(target), add = TRUE) #' -#' (a_b <- calculate_area_intersection_weights( -#' select(a, ida), select(b, idb), normalize = TRUE)) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = FALSE, allow_lonlat = TRUE)) #' -#' # NOTE: if we summarize by `b` (target) `w` sums to 1 where above, with -#' # normalize = FALSE, `w` summed to one per `a` (source). +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' summarize(group_by(a_b, idb), w = sum(w)) +#' sum(res$val * res$w * res$area) / sum(res$w * res$area) #' -#' # Since normalize is false, we apply weights like: -#' st_drop_geometry(a) |> -#' left_join(a_b, by = "ida") |> -#' group_by(idb) |> # group so we get one row per `b` -#' # now we weight by the percent of each polygon in `b` per polygon in `a` -#' summarize(new_val = sum( (val * w), na.rm = TRUE )) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = TRUE, allow_lonlat = TRUE)) +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' # NOTE: `w` is the fraction of the polygon from `a` overlapping the polygon from `b`. -#' # The area of `a` is built into the weight so we just sum the weith times value oer polygon. +#' sum(res$val * res$w) / sum(res$w) #' #' @export #' @importFrom sf st_intersection st_set_geometry st_area st_crs st_drop_geometry #' @importFrom dplyr mutate group_by right_join select ungroup left_join mutate calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = FALSE) { - + if(missing(normalize)) { warning("Required input normalize is missing, defaulting to FALSE.") normalize <- FALSE @@ -251,27 +138,26 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- areal::aw_intersect(y, source = x, areaVar = "area_intersection") - int <- areal::aw_total(int, source = x, - id = "varx", # the unique id in the "source" x - areaVar = "area_intersection", - totalVar = "totalArea_x", - type = "extensive", - weight = "total") - int <- areal::aw_weight(int, areaVar = "area_intersection", - totalVar = "totalArea_x", - areaWeight = "areaWeight_x_y") - - int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") + + if(!normalize) { - if(normalize) { + int <- areal::aw_total(int, + source = x, + id = "varx", # the unique id in the "source" x + areaVar = "area_intersection", + totalVar = "totalArea_x", + type = "extensive", + weight = "total") - # for normalized, we return the percent of each target covered by each source - int <- areal::aw_intersect(y, - source = x, - areaVar = "area_intersection") + int <- areal::aw_weight(int, areaVar = "area_intersection", + totalVar = "totalArea_x", + areaWeight = "areaWeight_x_y") + + } else { # for normalized, we sum the intersection area by the total target intersection area - int <- ungroup(mutate(group_by(int, vary), totalArea_y = sum(.data$area_intersection))) + int <- ungroup(mutate(group_by(int, vary), + totalArea_y = sum(.data$area_intersection))) int <- areal::aw_weight(int, areaVar = "area_intersection", @@ -283,7 +169,7 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") int <- select(int, varx, vary, w = "areaWeight_x_y") - + names(int) <- c(id_x, id_y, "w") return(dplyr::as_tibble(int)) diff --git a/man/calculate_area_intersection_weights.Rd b/man/calculate_area_intersection_weights.Rd index 2bbe991..d8a1862 100644 --- a/man/calculate_area_intersection_weights.Rd +++ b/man/calculate_area_intersection_weights.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/calculate_area_intersection_weights.R \name{calculate_area_intersection_weights} \alias{calculate_area_intersection_weights} -\title{Area Weighted Intersection (areal implementation)} +\title{Area Weighted Intersection} \usage{ calculate_area_intersection_weights(x, y, normalize, allow_lonlat = FALSE) } @@ -11,7 +11,18 @@ calculate_area_intersection_weights(x, y, normalize, allow_lonlat = FALSE) \item{y}{sf data.frame target features including one geometry column and one identifier column} -\item{normalize}{logical return normalized weights or not. See details and examples.} +\item{normalize}{logical return normalized weights or not. + +Normalized weights express the fraction of **target** polygons covered by +a portion of each **source** polygon. They are normalized in that the area +of each **source** polygon has already been factored into the weight. + +Un-normalized weights express the fraction of **source** polygons covered by +a portion of each **target** polygon. This is a more general form that requires +knowledge of the area of each **source** polygon to derive area-weighted +statistics from **source** to **target. + +See details and examples for more regarding this distinction.} \item{allow_lonlat}{boolean If FALSE (the default) lon/lat target features are not allowed. Intersections in lon/lat are generally not valid and problematic at the international date line.} @@ -34,191 +45,65 @@ from the \href{https://chris-prener.github.io/areal/}{areal package}. \details{ Two versions of weights are available: -If `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y - (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) - and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 - in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that - feature. - - For `normalize = FALSE` the area weighted mean calculation must include the area of each - x (source) polygon as in: - - > *in this case, `area` is the area of source polygons and you would do this operation grouped - by target polygon id.* - - > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` - +`normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y +(target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) +and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 +in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that +feature. + +For `normalize = FALSE` the area weighted mean calculation must include the area of each +x (source) polygon as in: + +> *in this case, `area` is the area of source polygons and you would do this operation grouped +by target polygon id.* + +> `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` + If `normalize = TRUE`, weights are divided by the target polygon area such that weights sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. - For `normalize = FALSE` the area weighted mean calculation no area is required - as in: +For `normalize = FALSE` the area weighted mean calculation no area is required +as in: - > `sum( (val * w), na.rm = TRUE ) / sum(w)` +> `sum( (val * w), na.rm = TRUE ) / sum(w)` - See examples for illustration of these two modes. +See examples for illustration of these two modes. } \examples{ -library(dplyr) library(sf) -library(ncdfgeom) - -g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) - -a1 = sf::st_polygon(g) * 0.8 -a2 = a1 + c(1, 2) -a3 = a1 + c(-1, 2) - -b1 = sf::st_polygon(g) -b2 = b1 + 2 -b3 = b1 + c(-0.2, 2) -b4 = b1 + c(2.2, 0) - -a = sf::st_sfc(a1,a2,a3) - -b = sf::st_sfc(b1, b2, b3, b4) - -plot(c(a,b), border = NA) -plot(a, border = 'darkgreen', add = TRUE) -plot(b, border = 'red', add = TRUE) - -a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3))) -b <- sf::st_sf(b, data.frame(idb = c(7, 8, 9, 10))) - -text(sapply(sf::st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), - sapply(sf::st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), - a$ida, col = "darkgreen") - -text(sapply(sf::st_geometry(b), \(x) mean(x[[1]][,1]) + 0.4), - sapply(sf::st_geometry(b), \(x) mean(x[[1]][,2])), - b$idb, col = "red") - -sf::st_agr(a) <- sf::st_agr(b) <- "constant" -sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) - -calculate_area_intersection_weights(a, b, normalize = FALSE) - -# NOTE: normalize = FALSE so weights sum to 1 per source polygon -# when source is fully within target. - -calculate_area_intersection_weights(a, b, normalize = TRUE) - -# NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap -# is ignored as if it does not exist. - -calculate_area_intersection_weights(b, a, normalize = FALSE) - -# NOTE: normalize = FALSE so weights never sum to 1 since no source is fully -# within target. - -calculate_area_intersection_weights(b, a, normalize = TRUE) - -# NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap -# is ignored as if it does not exist. - -# a more typical arrangement of polygons - -g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), - c(-1,1), c(-1,-1))) - -a1 = st_polygon(g) * 0.75 + c(-.25, -.25) -a2 = a1 + 1.5 -a3 = a1 + c(0, 1.5) -a4 = a1 + c(1.5, 0) - -b1 = st_polygon(g) -b2 = b1 + 2 -b3 = b1 + c(0, 2) -b4 = b1 + c(2, 0) - -a = st_sfc(a1,a2, a3, a4) -b = st_sfc(b1, b2, b3, b4) - -b <- st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -a <- st_sf(a, data.frame(ida = c(6, 7, 8, 9))) - -plot(st_geometry(b), border = 'red', lwd = 3) -plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE) - -text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), - sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), - a$ida, col = "darkgreen") - -text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4), - sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5), - b$idb, col = "red") - -st_agr(a) <- st_agr(b) <- "constant" -st_crs(b) <- st_crs(a) <- st_crs(5070) - -a$val <- c(1, 2, 3, 4) -a$a_areasqkm <- 1.5 ^ 2 - -plot(a["val"], reset = FALSE) -plot(st_geometry(b), border = 'red', lwd = 3, add = TRUE, reset = FALSE) -plot(st_geometry(a), border = 'darkgreen', lwd = 3, add = TRUE) - -text(sapply(st_geometry(a), \(x) mean(x[[1]][,1]) + 0.4), - sapply(st_geometry(a), \(x) mean(x[[1]][,2]) + 0.3), - a$ida, col = "darkgreen") - -text(sapply(st_geometry(b), \(x) mean(x[[1]][,1]) - 0.4), - sapply(st_geometry(b), \(x) mean(x[[1]][,2]) - 0.5), - b$idb, col = "red") - -# say we have data from `a` that we want sampled to `b`. -# this gives the percent of each `a` that intersects each `b` - -(a_b <- calculate_area_intersection_weights( - select(a, ida), select(b, idb), normalize = FALSE)) - -# NOTE: `w` sums to 1 per `a` in all cases - -summarize(group_by(a_b, ida), w = sum(w)) - -# Since normalize is false, we apply weights like: -st_drop_geometry(a) |> - left_join(a_b, by = "ida") |> - mutate(a_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `a` - group_by(idb) |> # group so we get one row per `b` - # now we calculate the value for each b with fraction of the area of each - # polygon in `a` per polygon in `b` with an equation like this: - summarize( - new_val = sum( (val * w * a_areasqkm), na.rm = TRUE ) / sum(w * a_areasqkm)) - -# NOTE: `w` is the fraction of the polygon in a. We need to multiply w by the -# unique area of the polygon it is associated with to get the weighted mean weight. - -# we can go in reverse if we had data from b that we want sampled to a -(b_a <- calculate_area_intersection_weights( - select(b, idb), select(a, ida), normalize = FALSE)) +source <- st_sf(source_id = c(1, 2), + val = c(10, 20), + geom = st_as_sfc(c( + "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))", + "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))"))) -# NOTE: `w` sums to 1 per `b` (source) only where `b` is fully covered by `a` (target). +source$area <- as.numeric(st_area(source)) -summarize(group_by(b_a, idb), w = sum(w)) +target <- st_sf(target_id = "a", + geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))")) -# Now let's look at what happens if we set normalize = TRUE. Here we -# get `a` as source and `b` as target but normalize the weights so -# the area of a is built into `w`. +plot(source['val'], reset = FALSE) +plot(st_geometry(target), add = TRUE) -(a_b <- calculate_area_intersection_weights( - select(a, ida), select(b, idb), normalize = TRUE)) +(w <- +calculate_area_intersection_weights(source[c("source_id", "geom")], + target[c("target_id", "geom")], + normalize = FALSE, allow_lonlat = TRUE)) -# NOTE: if we summarize by `b` (target) `w` sums to 1 where above, with -# normalize = FALSE, `w` summed to one per `a` (source). +(res <- +merge(st_drop_geometry(source), w, by = "source_id")) -summarize(group_by(a_b, idb), w = sum(w)) +sum(res$val * res$w * res$area) / sum(res$w * res$area) -# Since normalize is false, we apply weights like: -st_drop_geometry(a) |> - left_join(a_b, by = "ida") |> - group_by(idb) |> # group so we get one row per `b` - # now we weight by the percent of each polygon in `b` per polygon in `a` - summarize(new_val = sum( (val * w), na.rm = TRUE )) +(w <- +calculate_area_intersection_weights(source[c("source_id", "geom")], + target[c("target_id", "geom")], + normalize = TRUE, allow_lonlat = TRUE)) +(res <- +merge(st_drop_geometry(source), w, by = "source_id")) -# NOTE: `w` is the fraction of the polygon from `a` overlapping the polygon from `b`. -# The area of `a` is built into the weight so we just sum the weith times value oer polygon. +sum(res$val * res$w) / sum(res$w) } diff --git a/vignettes/polygon_intersection.Rmd b/vignettes/polygon_intersection.Rmd index ef4dae6..8af8b12 100644 --- a/vignettes/polygon_intersection.Rmd +++ b/vignettes/polygon_intersection.Rmd @@ -84,6 +84,248 @@ sum(is.na(weights$gdptools_wght)) # look at cases where gptools has NA and ncdfgeom does not weights[is.na(weights$gdptools_wght),] +``` +The following example illustrates the nuances between normalized and non-normalized area weights and shows more specifically how area weight intersection calculations can be accomplished. + +The set of polygons are a contrived but useful for the sake of demonstration. +```{r} + +library(dplyr) +library(sf) +library(ncdfgeom) + +g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) + +blue1 = sf::st_polygon(g) * 0.8 +blue2 = blue1 + c(1, 2) +blue3 = blue1 * 1.2 + c(-1, 2) + +pink1 = sf::st_polygon(g) +pink2 = pink1 + 2 +pink3 = pink1 + c(-0.2, 2) +pink4 = pink1 + c(2.2, 0) + +blue = sf::st_sfc(blue1,blue2,blue3) + +pink = sf::st_sfc(pink1, pink2, pink3, pink4) + +plot(c(blue,pink), border = NA) +plot(blue, border = "#648fff", add = TRUE) +plot(pink, border = "#dc267f", add = TRUE) + +blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3))) +pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10))) + +text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), + sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), + blue$idblue, col = "#648fff") + +text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4), + sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])), + pink$idpink, col = "#dc267f") + +sf::st_agr(blue) <- sf::st_agr(pink) <- "constant" +sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070) +``` +```{r} + +(blue_pink_norm_false <- +calculate_area_intersection_weights(blue, pink, normalize = FALSE)) +``` + +NOTE: normalize = FALSE so weights sum to 1 per source polygon only when a source polygon is fully covered by the target. The non-intersecting portion is not included. + +The following breaks down how to use these weights for one source polygon. + +```{r} +blue$val = c(30, 10, 20) +blue$area <- as.numeric(sf::st_area(blue)) + +(result <- st_drop_geometry(blue) |> + left_join(blue_pink_norm_false, by = "idblue")) +``` + +To calculate the value for pink-9, we would do: + +```{r} + +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864)) + +``` + +This is saying that 0.375 of blue-3 covers pink-9 and 0.6 of blue-2 covers pink-9. +Since we are using area as the weighting method, we multiply the fraction of each +source polygon by its area and the value we want to create an area weight for. +We sum the contributions from blue-2 and blue-3 to pink-9 and divide by the sum +of the combined area weights. Note that because there is no contribution to 9 +over some parts of the polygon, that missing area does not appear. The +intersecting areas are 0.96 and 2.23 meaning that we are missing +4 - 0.96 - 2.23 = 0.81 and could rewrite the value for pink-9 as: + +```{r} +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) / + ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864)) +``` + +Which evaluates to NA which is why for this operation we usually drop NA terms! + +The above can be accomplished with: + +```{r} +(result <- result |> + group_by(idpink) |> # group so we get one row per target + # now we calculate the value for each `pink` with fraction of the area of each + # polygon in `blue` per polygon in `pink` with an equation like this: + summarize( + new_val = sum( (val * w * area), na.rm = TRUE ) / sum(w * area))) +``` + +Now let's do the same thing but with `normalize = FALSE`. + +```{r} + +(blue_pink_norm_true <- +calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE)) + +``` + +NOTE: normalize = TRUE so weights sum to 1 per target polygon. Non-overlap is ignored as if it does not exist. + +The following breaks down how to use these weights for one source polygon. + +```{r} +(result <- st_drop_geometry(blue) |> + left_join(blue_pink_norm_true, by = "idblue")) +``` + +To calculate the value for pink-9, we would do: + +```{r} +((10 * 0.3012) + (20 * 0.6988)) / ((0.3012) + (0.6988)) +``` + +This is saying that the portion of pink-9 that should get the value fromblue-2 is 0.3 and the portion of pink-9 that should get the value from blue-3 is 0.7. In this form, our weights are transformed to includethe relative area of the source polygons. + +As shown above as well, the calculation can be accomplished with: + +```{r} +(result <- result |> + group_by(idpink) |> # group so we get one row per target + # now we calculate the value for each `pink` with fraction of the area of each + # polygon in `blue` per polygon in `pink` with an equation like this: + summarize( + new_val = sum( (val * w), na.rm = TRUE ))) +``` + +We can look at a more typical arrangement of polygons and look at this a different way. + +```{r, echo=FALSE} + +g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) + +blue1 = st_polygon(g) * 0.75 + c(-.25, -.25) +blue2 = blue1 + 1.5 +blue3 = blue1 + c(0, 1.5) +a4 = blue1 + c(1.5, 0) + +pink1 = st_polygon(g) +pink2 = pink1 + 2 +pink3 = pink1 + c(0, 2) +pink4 = pink1 + c(2, 0) + +blue = st_sfc(blue1,blue2, blue3, a4) +pink = st_sfc(pink1, pink2, pink3, pink4) + +pink <- st_sf(pink, data.frame(idpink = c(1, 2, 3, 4))) +blue <- st_sf(blue, data.frame(idblue = c(6, 7, 8, 9))) + +plot(st_geometry(pink), border = "#dc267f", lwd = 3) +plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE) + +text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), + sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), + blue$idblue, col = "#648fff") + +text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4), + sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5), + pink$idpink, col = "#dc267f") + +st_agr(blue) <- st_agr(pink) <- "constant" +st_crs(pink) <- st_crs(blue) <- st_crs(5070) +``` +Let's also look at the values. +```{r, echo = FALSE} +blue$val <- c(1, 2, 3, 4) +blue$a_areasqkm <- 1.5 ^ 2 + +plot(blue["val"], reset = FALSE, pal = heat.colors) +plot(st_geometry(pink), border = "#dc267f", lwd = 3, add = TRUE, reset = FALSE) +plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE) + +text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), + sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), + blue$idblue, col = "#648fff") + +text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4), + sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5), + pink$idpink, col = "#dc267f") +``` + +```{r} +# say we have data from `a` that we want sampled to `b`. +# this gives the percent of each `a` that intersects each `b` + +(a_b <- calculate_area_intersection_weights( + select(blue, idblue), select(pink, idpink), normalize = FALSE)) + +# NOTE: `w` sums to 1 per `a` in all cases + +summarize(group_by(a_b, idblue), w = sum(w)) + +# Since normalize is false, we apply weights like: +st_drop_geometry(blue) |> + left_join(a_b, by = "idblue") |> + mutate(a_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `a` + group_by(idpink) |> # group so we get one row per `b` + # now we calculate the value for each b with fraction of the area of each + # polygon in `a` per polygon in `b` with an equation like this: + summarize( + new_val = sum( (val * w * a_areasqkm), na.rm = TRUE ) / sum(w * a_areasqkm)) + +# NOTE: `w` is the fraction of the polygon in a. We need to multiply w by the +# unique area of the polygon it is associated with to get the weighted mean weight. + +# we can go in reverse if we had data from b that we want sampled to a + +(b_a <- calculate_area_intersection_weights( + select(pink, idpink), select(blue, idblue), normalize = FALSE)) + +# NOTE: `w` sums to 1 per `b` (source) only where `b` is fully covered by `a` (target). + +summarize(group_by(b_a, idpink), w = sum(w)) + +# Now let's look at what happens if we set normalize = TRUE. Here we +# get `a` as source and `b` as target but normalize the weights so +# the area of a is built into `w`. + +(a_b <- calculate_area_intersection_weights( + select(blue, idpinklue), select(pink, idpink), normalize = TRUE)) + +# NOTE: if we summarize by `b` (target) `w` sums to 1 where above, with +# normalize = FALSE, `w` summed to one per `a` (source). + +summarize(group_by(a_b, idpink), w = sum(w)) + +# Since normalize is false, we apply weights like: +st_drop_geometry(blue) |> + left_join(a_b, by = "idblue") |> + group_by(idpink) |> # group so we get one row per `b` + # now we weight by the percent of each polygon in `b` per polygon in `a` + summarize(new_val = sum( (val * w), na.rm = TRUE )) + +# NOTE: `w` is the fraction of the polygon from `a` overlapping the polygon from `b`. +# The area of `a` is built into the weight so we just sum the weith times value oer polygon. + ``` ```{r cleanup, echo=FALSE}