diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 098027b..24e464f 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -10,59 +10,37 @@ name: R-CMD-check jobs: R-CMD-check: - runs-on: ubuntu-20.04 + runs-on: ${{ matrix.config.os }} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} _R_CHECK_TESTS_NLINES_: 0 + NOT_CRAN: true + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v2 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-pandoc@v2 - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - - - name: Install dependencies - run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - remotes::install_cran("covr") - shell: Rscript {0} - - - name: Check - run: rcmdcheck::rcmdcheck(args = c("--ignore-vignettes", "--no-manual"), build_args = c("--no-build-vignettes"), error_on = "error", check_dir = "check") - shell: Rscript {0} + r-version: ${{ matrix.config.r }} + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck, any::covr + needs: check, coverage - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v3 + - uses: r-lib/actions/check-r-package@v2 with: - name: results - path: check + upload-snapshots: true - name: Test coverage run: covr::codecov() - shell: Rscript {0} + shell: Rscript {0} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e85d7b7..195865f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ncdfgeom Type: Package Title: 'NetCDF' Geometry and Time Series -Version: 1.2.0 +Version: 1.3.0 Authors@R: c(person("David", "Blodgett", role = c("aut", "cre"), email = "dblodgett@usgs.gov"), person("Luke", "Winslow", role = "ctb")) @@ -15,4 +15,4 @@ Suggests: testthat, knitr, rmarkdown, pkgdown, jsonlite, zip, ncdf4, nhdplusTool License: CC0 Encoding: UTF-8 VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/R/calculate_area_intersection_weights.R b/R/calculate_area_intersection_weights.R index da47a38..a9529fe 100644 --- a/R/calculate_area_intersection_weights.R +++ b/R/calculate_area_intersection_weights.R @@ -1,9 +1,9 @@ -#' Area Weighted Intersection (areal implementation) +#' Area Weighted Intersection #' @description Returns the fractional percent of each #' feature in x that is covered by each intersecting feature #' in y. These can be used as the weights in an area-weighted -#' mean overlay analysis where x is the data source and area- -#' weighted means are being generated for the target, y. +#' mean overlay analysis where x is the data **source** and area- +#' weighted means are being generated for the **target**, y. #' #' This function is a lightwieght wrapper around the functions #' \link[areal]{aw_intersect} \link[areal]{aw_total} and \link[areal]{aw_weight} @@ -11,9 +11,22 @@ #' #' @param x sf data.frame source features including one geometry column and one identifier column #' @param y sf data.frame target features including one geometry column and one identifier column -#' @param normalize logical return normalized weights or not. See details and examples. +#' @param normalize logical return normalized weights or not. +#' +#' Normalized weights express the fraction of **target** polygons covered by +#' a portion of each **source** polygon. They are normalized in that the area +#' of each **source** polygon has already been factored into the weight. +#' +#' Un-normalized weights express the fraction of **source** polygons covered by +#' a portion of each **target** polygon. This is a more general form that requires +#' knowledge of the area of each **source** polygon to derive area-weighted +#' statistics from **source** to **target. +#' +#' See details and examples for more regarding this distinction. +#' #' @param allow_lonlat boolean If FALSE (the default) lon/lat target features are not allowed. #' Intersections in lon/lat are generally not valid and problematic at the international date line. +#' #' @return data.frame containing fraction of each feature in x that is #' covered by each feature in y. #' @@ -21,120 +34,73 @@ #' #' Two versions of weights are available: #' -#' `normalize = FALSE`, if a polygon from x is entirely within a polygon in y, -#' w will be 1. If a polygon from x is 50% in one polygon from y and 50% in another, there -#' will be two rows, one for each x/y pair of features with w = 0.5 in each. Weights -#' will sum to 1 per SOURCE polygon if the target polygons fully cover that feature. -#' `normalize = TRUE`, weights are divided by the target polygon area such that weights -#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. -#' -#' @examples -#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1), -#' c(1,1), c(-1,1), -#' c(-1,-1)))) -#' b2 = b1 + 2 -#' b3 = b1 + c(-0.2, 2) -#' b4 = b1 + c(2.2, 0) -#' b = sf::st_sfc(b1, b2, b3, b4) -#' a1 = b1 * 0.8 -#' a2 = a1 + c(1, 2) -#' a3 = a1 + c(-1, 2) -#' a = sf::st_sfc(a1,a2,a3) -#' plot(b, border = 'red') -#' plot(a, border = 'green', add = TRUE) -#' -#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) -#' -#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -#' a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3))) -#' -#' sf::st_agr(a) <- sf::st_agr(b) <- "constant" -#' -#' calculate_area_intersection_weights(a, b, normalize = FALSE) -#' calculate_area_intersection_weights(a, b, normalize = TRUE) -#' calculate_area_intersection_weights(b, a, normalize = FALSE) -#' calculate_area_intersection_weights(b, a, normalize = TRUE) -#' -#' #a more typical arrangement of polygons -#' -#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1), -#' c(1,1), c(-1,1), -#' c(-1,-1)))) -#' b2 = b1 + 2 -#' b3 = b1 + c(0, 2) -#' b4 = b1 + c(2, 0) -#' b = sf::st_sfc(b1, b2, b3, b4) -#' a1 = b1 * 0.75 + c(-.25, -.25) -#' a2 = a1 + 1.5 -#' a3 = a1 + c(0, 1.5) -#' a4 = a1 + c(1.5, 0) -#' a = sf::st_sfc(a1,a2,a3, a4) -#' plot(b, border = 'red', lwd = 3) -#' plot(a, border = 'green', add = TRUE) -#' -#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) -#' -#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -#' a <- sf::st_sf(a, data.frame(ida = c("a", "b", "c", "d"))) -#' -#' sf::st_agr(a) <- sf::st_agr(b) <- "constant" -#' -#' # say we have data from `a` that we want sampled to `b`. -#' # this gives the percent of each `a` that intersects each `b` +#' `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y +#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) +#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 +#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that +#' feature. #' -#' (a_b <- calculate_area_intersection_weights(a, b, normalize = FALSE)) +#' For `normalize = FALSE` the area weighted mean calculation must include the area of each +#' x (source) polygon as in: #' -#' # note that `w` sums to 1 where `b` completely covers `a`. +#' > *in this case, `area` is the area of source polygons and you would do this operation grouped +#' by target polygon id.* +#' +#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` #' -#' dplyr::summarize(dplyr::group_by(a_b, ida), w = sum(w)) +#' If `normalize = TRUE`, weights are divided by the target polygon area such that weights +#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. +#' +#' For `normalize = FALSE` the area weighted mean calculation no area is required +#' as in: #' -#' # We can apply these weights like... -#' dplyr::tibble(ida = unique(a_b$ida), -#' val = c(1, 2, 3, 4)) |> -#' dplyr::left_join(a_b, by = "ida") |> -#' dplyr::mutate(val = ifelse(is.na(w), NA, val), -#' areasqkm = 1.5 ^ 2) |> # area of each polygon in `a` -#' dplyr::group_by(idb) |> # group so we get one row per `b` -#' # now we weight by the percent of the area from each polygon in `b` per polygon in `a` -#' dplyr::summarize(new_val = sum( (val * w * areasqkm), na.rm = TRUE ) / sum(w * areasqkm)) -#' -#' # we can go in reverse if we had data from b that we want sampled to a +#' > `sum( (val * w), na.rm = TRUE ) / sum(w)` +#' +#' See examples for illustration of these two modes. +#' +#' @examples +#' +#' library(sf) #' -#' (b_a <- calculate_area_intersection_weights(b, a, normalize = FALSE)) +#' source <- st_sf(source_id = c(1, 2), +#' val = c(10, 20), +#' geom = st_as_sfc(c( +#' "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))", +#' "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))"))) #' -#' # note that `w` sums to 1 only where `a` complete covers `b` +#' source$area <- as.numeric(st_area(source)) #' -#' dplyr::summarize(dplyr::group_by(b_a, idb), w = sum(w)) +#' target <- st_sf(target_id = "a", +#' geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))")) #' -#' # with `normalize = TRUE`, `w` will sum to 1 when the target -#' # completely covers the source rather than when the source completely -#' # covers the target. +#' plot(source['val'], reset = FALSE) +#' plot(st_geometry(target), add = TRUE) #' -#' (a_b <- calculate_area_intersection_weights(a, b, normalize = TRUE)) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = FALSE, allow_lonlat = TRUE)) #' -#' dplyr::summarize(dplyr::group_by(a_b, idb), w = sum(w)) +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' (b_a <- calculate_area_intersection_weights(b, a, normalize = TRUE)) +#' sum(res$val * res$w * res$area) / sum(res$w * res$area) #' -#' dplyr::summarize(dplyr::group_by(b_a, ida), w = sum(w)) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = TRUE, allow_lonlat = TRUE)) +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' # We can apply these weights like... -#' # Note that we don't need area in the normalized case -#' dplyr::tibble(ida = unique(a_b$ida), -#' val = c(1, 2, 3, 4)) |> -#' dplyr::left_join(a_b, by = "ida") |> -#' dplyr::mutate(val = ifelse(is.na(w), NA, val)) |> -#' dplyr::group_by(idb) |> # group so we get one row per `b` -#' # now we weight by the percent of the area from each polygon in `b` per polygon in `a` -#' dplyr::summarize(new_val = sum( (val * w), na.rm = TRUE )) - +#' sum(res$val * res$w) / sum(res$w) #' #' @export #' @importFrom sf st_intersection st_set_geometry st_area st_crs st_drop_geometry #' @importFrom dplyr mutate group_by right_join select ungroup left_join mutate calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = FALSE) { - + if(missing(normalize)) { warning("Required input normalize is missing, defaulting to FALSE.") normalize <- FALSE @@ -172,27 +138,26 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- areal::aw_intersect(y, source = x, areaVar = "area_intersection") - int <- areal::aw_total(int, source = x, - id = "varx", # the unique id in the "source" x - areaVar = "area_intersection", - totalVar = "totalArea_x", - type = "extensive", - weight = "total") - int <- areal::aw_weight(int, areaVar = "area_intersection", - totalVar = "totalArea_x", - areaWeight = "areaWeight_x_y") - - int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") + + if(!normalize) { - if(normalize) { + int <- areal::aw_total(int, + source = x, + id = "varx", # the unique id in the "source" x + areaVar = "area_intersection", + totalVar = "totalArea_x", + type = "extensive", + weight = "total") - # for normalized, we return the percent of each target covered by each source - int <- areal::aw_intersect(y, - source = x, - areaVar = "area_intersection") + int <- areal::aw_weight(int, areaVar = "area_intersection", + totalVar = "totalArea_x", + areaWeight = "areaWeight_x_y") - # for normalized, we sum the intersection area by the total target intersection area - int <- ungroup(mutate(group_by(int, vary), totalArea_y = sum(area_intersection))) + } else { + + # for normalized, we sum the intersection area by the total target area + int <- left_join(int, data.frame(vary = y$vary, + totalArea_y = as.numeric(sf::st_area(y))), by = "vary") int <- areal::aw_weight(int, areaVar = "area_intersection", @@ -203,8 +168,8 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") - int <- select(int, varx, vary, w = areaWeight_x_y) - + int <- select(int, varx, vary, w = "areaWeight_x_y") + names(int) <- c(id_x, id_y, "w") return(dplyr::as_tibble(int)) diff --git a/docs/404.html b/docs/404.html index 16a73ca..21aaa2c 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@
-(vars <- ncmeta::nc_vars(example_file))
+(vars <- ncmeta::nc_vars(example_file))
#> # A tibble: 5 × 5
#> id name type ndims natts
#> <int> <chr> <chr> <int> <int>
@@ -146,7 +149,7 @@ Load Spatial Data= "station",
variables = vars$name) -> example_file
Now the NetCDF file looks like:
-#> netcdf file69986203fbb {
+#> netcdf file63806c813359 {
#> dimensions:
#> maxStrlen64 = 64 ;
#> station = 2 ;
@@ -681,7 +684,7 @@ Multiple MultiPolygons wi
diff --git a/docs/articles/index.html b/docs/articles/index.html
index a3104bb..05de0fe 100644
--- a/docs/articles/index.html
+++ b/docs/articles/index.html
@@ -1,5 +1,5 @@
-Articles • ncdfgeom Articles • ncdfgeom
@@ -17,7 +17,7 @@
@@ -87,7 +87,7 @@ All vignettes
diff --git a/docs/articles/ncdfgeom.html b/docs/articles/ncdfgeom.html
index 34de8e3..e5586d1 100644
--- a/docs/articles/ncdfgeom.html
+++ b/docs/articles/ncdfgeom.html
@@ -6,7 +6,7 @@
NetCDF-CF Geometry and Timeseries Tools for R • ncdfgeom
-
+
@@ -33,7 +33,7 @@
@@ -55,6 +55,9 @@
Reading and Writing NetCDF-CF Geometry
+
+ Area Weight Generation for Polygon Intersections
+
Reading and Writing Timeseries
@@ -538,7 +541,7 @@ Read Timeseries and Geometry f
diff --git a/docs/articles/ncdfgeom_files/figure-html/plot-1.png b/docs/articles/ncdfgeom_files/figure-html/plot-1.png
index c87f77b..c150975 100644
Binary files a/docs/articles/ncdfgeom_files/figure-html/plot-1.png and b/docs/articles/ncdfgeom_files/figure-html/plot-1.png differ
diff --git a/docs/articles/polygon_intersection.html b/docs/articles/polygon_intersection.html
index 154fab4..d81e3a8 100644
--- a/docs/articles/polygon_intersection.html
+++ b/docs/articles/polygon_intersection.html
@@ -6,7 +6,7 @@
Area Weight Generation for Polygon Intersections • ncdfgeom
-
+
@@ -33,7 +33,7 @@
@@ -103,81 +103,222 @@ Area Weight Generation for Polygon
It
is a comparison with the gdptools
python package
demonstration here.
-See calculate_area_intersection_weights()
for additional
-demonstration and info.
gdptools_weights <- read.csv(system.file("extdata/gdptools_prl_out.csv", package = "ncdfgeom"),
colClasses = c("character", "character", "numeric"))
-gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
+gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
gage_id <- "USGS-01482100"
-basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
-huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
-#> Spherical geometry (s2) switched off
-#> Spherical geometry (s2) switched on
-huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
-#> Spherical geometry (s2) switched off
-#> Spherical geometry (s2) switched on
+basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
+huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
+huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
org_par <- par(mar = c(0, 0, 0, 0))
-plot(sf::st_as_sfc(sf::st_bbox(huc12)))
-plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
-plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
-plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
-
-
-par(org_par)
+plot(sf::st_as_sfc(sf::st_bbox(huc12)))
+plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
+plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
+plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
+par(org_par)
weights <- ncdfgeom::calculate_area_intersection_weights(
- x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
- y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
+ x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
+ y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
normalize = TRUE
)
-#> Loading required namespace: areal
-weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))
+weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))
With weights calculated, we can do a little investigation into the
differences.
-
+
weights$diff <- weights$w - weights$gdptools_wght
# make sure nothing is way out of whack
max(weights$diff, na.rm = TRUE)
-#> [1] 0.0000009205585
# ensure the weights generally sum as we would expect.
sum(weights$gdptools_wght, na.rm = TRUE)
-#> [1] 25
sum(weights$w, na.rm = TRUE)
-#> [1] 25
length(unique(na.omit(weights$huc8)))
-#> [1] 25
# see how many NA values we have in each.
sum(is.na(weights$w))
-#> [1] 183
sum(is.na(weights$gdptools_wght))
-#> [1] 183
# look at cases where gptools has NA and ncdfgeom does not
-weights[is.na(weights$gdptools_wght),]
-#> # A tibble: 183 × 5
-#> huc12 huc8 w gdptools_wght diff
-#> <chr> <chr> <dbl> <dbl> <dbl>
-#> 1 011000050101 NA NA NA NA
-#> 2 011000050201 NA NA NA NA
-#> 3 011000050202 NA NA NA NA
-#> 4 011000050203 NA NA NA NA
-#> 5 011000050305 NA NA NA NA
-#> 6 011000050501 NA NA NA NA
-#> 7 011000050504 NA NA NA NA
-#> 8 020200030604 NA NA NA NA
-#> 9 020200030801 NA NA NA NA
-#> 10 020200030802 NA NA NA NA
-#> # ℹ 173 more rows
+weights[is.na(weights$gdptools_wght) & !is.na(weights$w),]
+The following example illustrates the nuances between normalized and
+non-normalized area weights and shows more specifically how area weight
+intersection calculations can be accomplished.
+The set of polygons are a contrived but useful for the sake of
+demonstration.
+
+
+library(dplyr)
+library(sf)
+library(ncdfgeom)
+
+g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))
+
+blue1 = sf::st_polygon(g) * 0.8
+blue2 = blue1 + c(1, 2)
+blue3 = blue1 * 1.2 + c(-1, 2)
+
+pink1 = sf::st_polygon(g)
+pink2 = pink1 + 2
+pink3 = pink1 + c(-0.2, 2)
+pink4 = pink1 + c(2.2, 0)
+
+blue = sf::st_sfc(blue1,blue2,blue3)
+
+pink = sf::st_sfc(pink1, pink2, pink3, pink4)
+
+plot(c(blue,pink), border = NA)
+plot(blue, border = "#648fff", add = TRUE)
+plot(pink, border = "#dc267f", add = TRUE)
+
+blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3)))
+pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10)))
+
+text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
+ sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
+ blue$idblue, col = "#648fff")
+
+text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4),
+ sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])),
+ pink$idpink, col = "#dc267f")
+
+sf::st_agr(blue) <- sf::st_agr(pink) <- "constant"
+sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070)
+
+
+(blue_pink_norm_false <-
+calculate_area_intersection_weights(blue, pink, normalize = FALSE))
+NOTE: normalize = FALSE so weights sum to 1 per source polygon only
+when a source polygon is fully covered by the target. The
+non-intersecting portion is not included.
+The following breaks down how to use these weights for one source
+polygon.
+
+blue$val = c(30, 10, 20)
+blue$area <- as.numeric(sf::st_area(blue))
+
+(result <- st_drop_geometry(blue) |>
+ left_join(blue_pink_norm_false, by = "idblue"))
+To calculate the value for pink-9, we would do:
+
+
+((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864))
+This is saying that 0.375 of blue-3 covers pink-9 and 0.6 of blue-2
+covers pink-9. Since we are using area as the weighting method, we
+multiply the fraction of each source polygon by its area and the value
+we want to create an area weight for. We sum the contributions from
+blue-2 and blue-3 to pink-9 and divide by the sum of the combined area
+weights.
+Note that because there is no contribution to 9 over some parts of
+the polygon, that missing area does not appear. The intersecting areas
+are 0.96 and 2.23 meaning that we are missing
+4 - 0.96 - 2.23 = 0.81
+and could rewrite the value for pink-9 as:
+
+((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) /
+ ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864))
+Which evaluates to NA. This is why for this operation we usually drop
+NA terms!
+In practice, the above can be accomplished with:
+
+(result <- result |>
+ group_by(idpink) |> # group so we get one row per target
+ # now we calculate the value for each `pink` with fraction of the area of each
+ # polygon in `blue` per polygon in `pink` with an equation like this:
+ summarize(
+ new_val = sum( (val * w * area) ) / sum(w * area)))
+Now let’s do the same thing but with
+normalize = FALSE
.
+
+
+(blue_pink_norm_true <-
+calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE))
+NOTE: normalize = TRUE so weights sum to 1 per target polygon.
+Non-overlap is ignored as if it does not exist.
+The following breaks down how to use these weights for one source
+polygon.
+
+(result <- st_drop_geometry(blue) |>
+ left_join(blue_pink_norm_true, by = "idblue"))
+To calculate the value for pink-9, we would do:
+
+((10 * 0.24) + (20 * 0.5568)) / (0.24 + (0.5568))
+This is saying that the portion of pink-9 that should get the value
+from blue-2 is 0.3 and the portion of pink-9 that should get the value
+from blue-3 is 0.7. In this form, our weights are transformed to
+includethe relative area of the source polygons.
+As shown above as well, the calculation can be accomplished with:
+
+(result <- result |>
+ group_by(idpink) |> # group so we get one row per target
+ # now we calculate the value for each `pink` with fraction of the area of each
+ # polygon in `blue` per polygon in `pink` with an equation like this:
+ summarize(
+ new_val = sum( (val * w) ) / sum(w)))
+We can look at a more typical arrangement of polygons and look at
+this a different way.
+Let’s also look at the values.
+
+# say we have data from `blue` that we want sampled to `pink`.
+# this gives the percent of each `blue` that intersects each `pink`
+
+(blue_pink <- calculate_area_intersection_weights(
+ select(blue, idblue), select(pink, idpink), normalize = FALSE))
+
+# NOTE: `w` sums to 1 per `blue` in all cases
+
+summarize(group_by(blue_pink, idblue), w = sum(w))
+
+# Since normalize is false, we apply weights like:
+st_drop_geometry(blue) |>
+ left_join(blue_pink, by = "idblue") |>
+ mutate(blue_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `blue`
+ group_by(idpink) |> # group so we get one row per `pink`
+ # now we calculate the value for each b with fraction of the area of each
+ # polygon in `blue` per polygon in `pink` with an equation like this:
+ summarize(
+ new_val = sum( (val * w * blue_areasqkm) ) / sum(w * blue_areasqkm))
+
+# NOTE: `w` is the fraction of the polygon in `blue`. We need to multiply `w` by the
+# unique area of the polygon it is associated with to get the weighted mean weight.
+
+# we can go in reverse if we had data from `pink` that we want sampled to `blue`
+
+(pink_blue <- calculate_area_intersection_weights(
+ select(pink, idpink), select(blue, idblue), normalize = FALSE))
+
+# NOTE: `w` sums to 1 per `pink` (source) only where `pink` is fully covered by `blue` (target).
+
+summarize(group_by(pink_blue, idpink), w = sum(w))
+
+# Now let's look at what happens if we set normalize = TRUE. Here we
+# get `blue` as source and `pink` as target but normalize the weights so
+# the area of `blue` is built into `w`.
+
+(blue_pink <- calculate_area_intersection_weights(
+ select(blue, idblue), select(pink, idpink), normalize = TRUE))
+
+# NOTE: if we summarize by `pink` (target) `w` sums to 1 only where there is full overlap.
+
+summarize(group_by(blue_pink, idpink), w = sum(w))
+
+# Since normalize is false, we apply weights like:
+st_drop_geometry(blue) |>
+ left_join(blue_pink, by = "idblue") |>
+ group_by(idpink) |> # group so we get one row per `pink`
+ # now we weight by the percent of each polygon in `pink` per polygon in `blue`
+ summarize(new_val = sum( (val * w) ) / sum( w ))
+
+# NOTE: `w` is the fraction of the polygon from `blue` overlapping the polygon from `pink`.
+# The area of `blue` is built into the weight so we just sum the weith times value oer polygon.
@@ -55,6 +55,9 @@
Reading and Writing NetCDF-CF Geometry
+
+ Area Weight Generation for Polygon Intersections
+
Reading and Writing Timeseries
@@ -163,7 +166,7 @@ Reading and Writing Timeseries
timeseries ID (which is stored as a string).
-ncmeta::nc_dims(nc_file)
+ncmeta::nc_dims(nc_file)
#> # A tibble: 3 × 4
#> id name length unlim
#> <int> <chr> <dbl> <lgl>
@@ -173,7 +176,7 @@ Reading and Writing Timeseries
The file has variables for latitude, longitude, altitude, timeseries
IDs, and a data variable.
-ncmeta::nc_vars(nc_file)
+ncmeta::nc_vars(nc_file)
#> # A tibble: 6 × 5
#> id name type ndims natts
#> <int> <chr> <chr> <int> <int>
@@ -186,7 +189,7 @@ Reading and Writing Timeseries
The primary dimensions in the file are of length, number of time
steps and number of time series.
-ncmeta::nc_dims(nc_file)
+ncmeta::nc_dims(nc_file)
#> # A tibble: 3 × 4
#> id name length unlim
#> <int> <chr> <dbl> <lgl>
@@ -289,7 +292,7 @@ Reading and Writing Timeseries
diff --git a/docs/authors.html b/docs/authors.html
index eda525b..97b12c3 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -1,5 +1,5 @@
-Authors and Citation • ncdfgeom Authors and Citation • ncdfgeom
@@ -17,7 +17,7 @@
@@ -63,7 +63,7 @@
diff --git a/docs/index.html b/docs/index.html
index 88a2c6d..b03fa50 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -6,7 +6,7 @@
NetCDF Geometry and Time Series • ncdfgeom
-
+
@@ -33,7 +33,7 @@
@@ -104,7 +104,7 @@ Installationinstall.packages("ncdfgeom")
For the latest development version:
install.packages("remotes")
-remotes::install_github("DOI-USGS/ncdfgeom")
+remotes::install_github("DOI-USGS/ncdfgeom")
Contributing
@@ -174,7 +174,7 @@ Developers
diff --git a/docs/news/index.html b/docs/news/index.html
index 3d55034..7e90730 100644
--- a/docs/news/index.html
+++ b/docs/news/index.html
@@ -1,5 +1,5 @@
-Changelog • ncdfgeom Changelog • ncdfgeom
@@ -17,7 +17,7 @@
@@ -90,7 +90,7 @@ ncdfgeom 1.1.0
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index bc8ae0c..65f639f 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -1,10 +1,10 @@
-pandoc: 3.1.1
-pkgdown: 2.0.7
+pandoc: '3.5'
+pkgdown: 2.0.9
pkgdown_sha: ~
articles:
geometry: geometry.html
ncdfgeom: ncdfgeom.html
polygon_intersection: polygon_intersection.html
timeseries: timeseries.html
-last_built: 2024-02-05T14:38Z
+last_built: 2024-11-19T19:10Z
diff --git a/docs/reference/calculate_area_intersection_weights-1.png b/docs/reference/calculate_area_intersection_weights-1.png
index b183774..9afaaeb 100644
Binary files a/docs/reference/calculate_area_intersection_weights-1.png and b/docs/reference/calculate_area_intersection_weights-1.png differ
diff --git a/docs/reference/calculate_area_intersection_weights.html b/docs/reference/calculate_area_intersection_weights.html
index 9a6552d..8a66589 100644
--- a/docs/reference/calculate_area_intersection_weights.html
+++ b/docs/reference/calculate_area_intersection_weights.html
@@ -1,9 +1,9 @@
-Area Weighted Intersection (areal implementation) — calculate_area_intersection_weights • ncdfgeom Area Weighted Intersection — calculate_area_intersection_weights • ncdfgeom
-will be two rows, one for each x/y pair of features with w = 0.5 in each. Weights
-will sum to 1 per SOURCE polygon if the target polygons fully cover that feature.
-`normalize = TRUE`, weights are divided by the target polygon area such that weights
+
`normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y
+(target), w will be 1. If a polygon from x (source) is 50
+and 50
+in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that
+feature.
+For `normalize = FALSE` the area weighted mean calculation must include the area of each
+x (source) polygon as in:
+> *in this case, `area` is the area of source polygons and you would do this operation grouped
+by target polygon id.*
+> `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)`
+If `normalize = TRUE`, weights are divided by the target polygon area such that weights
sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons.
+For `normalize = FALSE` the area weighted mean calculation no area is required
+as in:
+> `sum( (val * w), na.rm = TRUE ) / sum(w)`
+See examples for illustration of these two modes.
Examples
- b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1),
- c(1,1), c(-1,1),
- c(-1,-1))))
-b2 = b1 + 2
-b3 = b1 + c(-0.2, 2)
-b4 = b1 + c(2.2, 0)
-b = sf::st_sfc(b1, b2, b3, b4)
-a1 = b1 * 0.8
-a2 = a1 + c(1, 2)
-a3 = a1 + c(-1, 2)
-a = sf::st_sfc(a1,a2,a3)
-plot(b, border = 'red')
-plot(a, border = 'green', add = TRUE)
-
-
-sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070)
-
-b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4)))
-a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3)))
-
-sf::st_agr(a) <- sf::st_agr(b) <- "constant"
-
-calculate_area_intersection_weights(a, b, normalize = FALSE)
-#> # A tibble: 4 × 3
-#> ida idb w
-#> <dbl> <dbl> <dbl>
-#> 1 1 1 1
-#> 2 2 2 0.5
-#> 3 2 3 0.375
-#> 4 3 3 0.625
-calculate_area_intersection_weights(a, b, normalize = TRUE)
-#> # A tibble: 4 × 3
-#> ida idb w
-#> <dbl> <dbl> <dbl>
-#> 1 1 1 1
-#> 2 2 2 1
-#> 3 2 3 0.375
-#> 4 3 3 0.625
-calculate_area_intersection_weights(b, a, normalize = FALSE)
-#> # A tibble: 5 × 3
-#> idb ida w
-#> <dbl> <dbl> <dbl>
-#> 1 1 1 0.64
-#> 2 2 2 0.32
-#> 3 3 2 0.24
-#> 4 3 3 0.4
-#> 5 4 NA NA
-calculate_area_intersection_weights(b, a, normalize = TRUE)
-#> # A tibble: 5 × 3
-#> idb ida w
-#> <dbl> <dbl> <dbl>
-#> 1 1 1 1
-#> 2 2 2 0.571
-#> 3 3 2 0.429
-#> 4 3 3 1
-#> 5 4 NA NA
-
-#a more typical arrangement of polygons
-
-b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1),
- c(1,1), c(-1,1),
- c(-1,-1))))
-b2 = b1 + 2
-b3 = b1 + c(0, 2)
-b4 = b1 + c(2, 0)
-b = sf::st_sfc(b1, b2, b3, b4)
-a1 = b1 * 0.75 + c(-.25, -.25)
-a2 = a1 + 1.5
-a3 = a1 + c(0, 1.5)
-a4 = a1 + c(1.5, 0)
-a = sf::st_sfc(a1,a2,a3, a4)
-plot(b, border = 'red', lwd = 3)
-plot(a, border = 'green', add = TRUE)
-
-
-sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070)
-
-b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4)))
-a <- sf::st_sf(a, data.frame(ida = c("a", "b", "c", "d")))
+
+library(sf)
+#> Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.3.1; sf_use_s2() is TRUE
-sf::st_agr(a) <- sf::st_agr(b) <- "constant"
+source <- st_sf(source_id = c(1, 2),
+ val = c(10, 20),
+ geom = st_as_sfc(c(
+ "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))",
+ "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))")))
-# say we have data from `a` that we want sampled to `b`.
-# this gives the percent of each `a` that intersects each `b`
+source$area <- as.numeric(st_area(source))
-(a_b <- calculate_area_intersection_weights(a, b, normalize = FALSE))
-#> # A tibble: 9 × 3
-#> ida idb w
-#> <chr> <dbl> <dbl>
-#> 1 a 1 1
-#> 2 b 1 0.111
-#> 3 c 1 0.333
-#> 4 d 1 0.333
-#> 5 b 2 0.444
-#> 6 b 3 0.222
-#> 7 c 3 0.667
-#> 8 b 4 0.222
-#> 9 d 4 0.667
+target <- st_sf(target_id = "a",
+ geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))"))
-# note that `w` sums to 1 where `b` completely covers `a`.
-
-dplyr::summarize(dplyr::group_by(a_b, ida), w = sum(w))
-#> # A tibble: 4 × 2
-#> ida w
-#> <chr> <dbl>
-#> 1 a 1
-#> 2 b 1
-#> 3 c 1
-#> 4 d 1
-
-# We can apply these weights like...
-dplyr::tibble(ida = unique(a_b$ida),
- val = c(1, 2, 3, 4)) |>
- dplyr::left_join(a_b, by = "ida") |>
- dplyr::mutate(val = ifelse(is.na(w), NA, val),
- areasqkm = 1.5 ^ 2) |> # area of each polygon in `a`
- dplyr::group_by(idb) |> # group so we get one row per `b`
- # now we weight by the percent of the area from each polygon in `b` per polygon in `a`
- dplyr::summarize(new_val = sum( (val * w * areasqkm), na.rm = TRUE ) / sum(w * areasqkm))
-#> # A tibble: 4 × 2
-#> idb new_val
-#> <dbl> <dbl>
-#> 1 1 2
-#> 2 2 2
-#> 3 3 2.75
-#> 4 4 3.5
-
-# we can go in reverse if we had data from b that we want sampled to a
-
-(b_a <- calculate_area_intersection_weights(b, a, normalize = FALSE))
-#> # A tibble: 9 × 3
-#> idb ida w
-#> <dbl> <chr> <dbl>
-#> 1 1 a 0.562
-#> 2 1 b 0.0625
-#> 3 2 b 0.25
-#> 4 3 b 0.125
-#> 5 4 b 0.125
-#> 6 1 c 0.188
-#> 7 3 c 0.375
-#> 8 1 d 0.188
-#> 9 4 d 0.375
-
-# note that `w` sums to 1 only where `a` complete covers `b`
-
-dplyr::summarize(dplyr::group_by(b_a, idb), w = sum(w))
-#> # A tibble: 4 × 2
-#> idb w
-#> <dbl> <dbl>
-#> 1 1 1
-#> 2 2 0.25
-#> 3 3 0.5
-#> 4 4 0.5
-
-# with `normalize = TRUE`, `w` will sum to 1 when the target
-# completely covers the source rather than when the source completely
-# covers the target.
-
-(a_b <- calculate_area_intersection_weights(a, b, normalize = TRUE))
-#> # A tibble: 9 × 3
-#> ida idb w
-#> <chr> <dbl> <dbl>
-#> 1 a 1 0.562
-#> 2 b 1 0.0625
-#> 3 c 1 0.188
-#> 4 d 1 0.188
-#> 5 b 2 1
-#> 6 b 3 0.25
-#> 7 c 3 0.75
-#> 8 b 4 0.25
-#> 9 d 4 0.75
-
-dplyr::summarize(dplyr::group_by(a_b, idb), w = sum(w))
-#> # A tibble: 4 × 2
-#> idb w
-#> <dbl> <dbl>
-#> 1 1 1
-#> 2 2 1
-#> 3 3 1
-#> 4 4 1
-
-(b_a <- calculate_area_intersection_weights(b, a, normalize = TRUE))
-#> # A tibble: 9 × 3
-#> idb ida w
-#> <dbl> <chr> <dbl>
-#> 1 1 a 1
-#> 2 1 b 0.111
-#> 3 2 b 0.444
-#> 4 3 b 0.222
-#> 5 4 b 0.222
-#> 6 1 c 0.333
-#> 7 3 c 0.667
-#> 8 1 d 0.333
-#> 9 4 d 0.667
-
-dplyr::summarize(dplyr::group_by(b_a, ida), w = sum(w))
-#> # A tibble: 4 × 2
-#> ida w
-#> <chr> <dbl>
-#> 1 a 1
-#> 2 b 1
-#> 3 c 1
-#> 4 d 1
+plot(source['val'], reset = FALSE)
+plot(st_geometry(target), add = TRUE)
+
-# We can apply these weights like...
-# Note that we don't need area in the normalized case
-dplyr::tibble(ida = unique(a_b$ida),
- val = c(1, 2, 3, 4)) |>
- dplyr::left_join(a_b, by = "ida") |>
- dplyr::mutate(val = ifelse(is.na(w), NA, val)) |>
- dplyr::group_by(idb) |> # group so we get one row per `b`
- # now we weight by the percent of the area from each polygon in `b` per polygon in `a`
- dplyr::summarize(new_val = sum( (val * w), na.rm = TRUE ))
-#> # A tibble: 4 × 2
-#> idb new_val
-#> <dbl> <dbl>
-#> 1 1 2
-#> 2 2 2
-#> 3 3 2.75
-#> 4 4 3.5
+(w <-
+calculate_area_intersection_weights(source[c("source_id", "geom")],
+ target[c("target_id", "geom")],
+ normalize = FALSE, allow_lonlat = TRUE))
+#> # A tibble: 2 × 3
+#> source_id target_id w
+#> <dbl> <chr> <dbl>
+#> 1 1 a 0.375
+#> 2 2 a 0.604
+
+(res <-
+merge(st_drop_geometry(source), w, by = "source_id"))
+#> source_id val area target_id w
+#> 1 1 10 2.5600 a 0.3750000
+#> 2 2 20 3.6864 a 0.6041667
+
+sum(res$val * res$w * res$area) / sum(res$w * res$area)
+#> [1] 16.98795
+
+(w <-
+calculate_area_intersection_weights(source[c("source_id", "geom")],
+ target[c("target_id", "geom")],
+ normalize = TRUE, allow_lonlat = TRUE))
+#> # A tibble: 2 × 3
+#> source_id target_id w
+#> <dbl> <chr> <dbl>
+#> 1 1 a 0.24
+#> 2 2 a 0.557
+(res <-
+merge(st_drop_geometry(source), w, by = "source_id"))
+#> source_id val area target_id w
+#> 1 1 10 2.5600 a 0.2400
+#> 2 2 20 3.6864 a 0.5568
+
+sum(res$val * res$w) / sum(res$w)
+#> [1] 16.98795
@@ -360,7 +216,7 @@ Examples
diff --git a/docs/reference/create_cell_geometry.html b/docs/reference/create_cell_geometry.html
index 76df750..8dc5527 100644
--- a/docs/reference/create_cell_geometry.html
+++ b/docs/reference/create_cell_geometry.html
@@ -1,5 +1,5 @@
-Create Cell Geometry — create_cell_geometry • ncdfgeom Create Cell Geometry — create_cell_geometry • ncdfgeom
@@ -17,7 +17,7 @@
@@ -37,6 +37,9 @@