diff --git a/.github/workflows/R-CMD-check.yml b/.github/workflows/R-CMD-check.yml index 098027b..24e464f 100644 --- a/.github/workflows/R-CMD-check.yml +++ b/.github/workflows/R-CMD-check.yml @@ -10,59 +10,37 @@ name: R-CMD-check jobs: R-CMD-check: - runs-on: ubuntu-20.04 + runs-on: ${{ matrix.config.os }} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} _R_CHECK_TESTS_NLINES_: 0 + NOT_CRAN: true + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'release'} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: r-lib/actions/setup-r@v2 - - - name: Query dependencies - run: | - install.packages('remotes') - saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) - writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") - shell: Rscript {0} + - uses: r-lib/actions/setup-pandoc@v2 - - name: Cache R packages - if: runner.os != 'Windows' - uses: actions/cache@v2 + - uses: r-lib/actions/setup-r@v2 with: - path: ${{ env.R_LIBS_USER }} - key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} - restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- - - - name: Install system dependencies - if: runner.os == 'Linux' - env: - RHUB_PLATFORM: linux-x86_64-ubuntu-gcc - run: | - Rscript -e "remotes::install_github('r-hub/sysreqs')" - sysreqs=$(Rscript -e "cat(sysreqs::sysreq_commands('DESCRIPTION'))") - sudo -s eval "$sysreqs" - - - name: Install dependencies - run: | - install.packages(c("remotes")) - remotes::install_deps(dependencies = TRUE) - remotes::install_cran("rcmdcheck") - remotes::install_cran("covr") - shell: Rscript {0} - - - name: Check - run: rcmdcheck::rcmdcheck(args = c("--ignore-vignettes", "--no-manual"), build_args = c("--no-build-vignettes"), error_on = "error", check_dir = "check") - shell: Rscript {0} + r-version: ${{ matrix.config.r }} + use-public-rspm: true + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck, any::covr + needs: check, coverage - - name: Upload check results - if: failure() - uses: actions/upload-artifact@v3 + - uses: r-lib/actions/check-r-package@v2 with: - name: results - path: check + upload-snapshots: true - name: Test coverage run: covr::codecov() - shell: Rscript {0} + shell: Rscript {0} \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index e85d7b7..195865f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ncdfgeom Type: Package Title: 'NetCDF' Geometry and Time Series -Version: 1.2.0 +Version: 1.3.0 Authors@R: c(person("David", "Blodgett", role = c("aut", "cre"), email = "dblodgett@usgs.gov"), person("Luke", "Winslow", role = "ctb")) @@ -15,4 +15,4 @@ Suggests: testthat, knitr, rmarkdown, pkgdown, jsonlite, zip, ncdf4, nhdplusTool License: CC0 Encoding: UTF-8 VignetteBuilder: knitr -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/R/calculate_area_intersection_weights.R b/R/calculate_area_intersection_weights.R index da47a38..a9529fe 100644 --- a/R/calculate_area_intersection_weights.R +++ b/R/calculate_area_intersection_weights.R @@ -1,9 +1,9 @@ -#' Area Weighted Intersection (areal implementation) +#' Area Weighted Intersection #' @description Returns the fractional percent of each #' feature in x that is covered by each intersecting feature #' in y. These can be used as the weights in an area-weighted -#' mean overlay analysis where x is the data source and area- -#' weighted means are being generated for the target, y. +#' mean overlay analysis where x is the data **source** and area- +#' weighted means are being generated for the **target**, y. #' #' This function is a lightwieght wrapper around the functions #' \link[areal]{aw_intersect} \link[areal]{aw_total} and \link[areal]{aw_weight} @@ -11,9 +11,22 @@ #' #' @param x sf data.frame source features including one geometry column and one identifier column #' @param y sf data.frame target features including one geometry column and one identifier column -#' @param normalize logical return normalized weights or not. See details and examples. +#' @param normalize logical return normalized weights or not. +#' +#' Normalized weights express the fraction of **target** polygons covered by +#' a portion of each **source** polygon. They are normalized in that the area +#' of each **source** polygon has already been factored into the weight. +#' +#' Un-normalized weights express the fraction of **source** polygons covered by +#' a portion of each **target** polygon. This is a more general form that requires +#' knowledge of the area of each **source** polygon to derive area-weighted +#' statistics from **source** to **target. +#' +#' See details and examples for more regarding this distinction. +#' #' @param allow_lonlat boolean If FALSE (the default) lon/lat target features are not allowed. #' Intersections in lon/lat are generally not valid and problematic at the international date line. +#' #' @return data.frame containing fraction of each feature in x that is #' covered by each feature in y. #' @@ -21,120 +34,73 @@ #' #' Two versions of weights are available: #' -#' `normalize = FALSE`, if a polygon from x is entirely within a polygon in y, -#' w will be 1. If a polygon from x is 50% in one polygon from y and 50% in another, there -#' will be two rows, one for each x/y pair of features with w = 0.5 in each. Weights -#' will sum to 1 per SOURCE polygon if the target polygons fully cover that feature. -#' `normalize = TRUE`, weights are divided by the target polygon area such that weights -#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. -#' -#' @examples -#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1), -#' c(1,1), c(-1,1), -#' c(-1,-1)))) -#' b2 = b1 + 2 -#' b3 = b1 + c(-0.2, 2) -#' b4 = b1 + c(2.2, 0) -#' b = sf::st_sfc(b1, b2, b3, b4) -#' a1 = b1 * 0.8 -#' a2 = a1 + c(1, 2) -#' a3 = a1 + c(-1, 2) -#' a = sf::st_sfc(a1,a2,a3) -#' plot(b, border = 'red') -#' plot(a, border = 'green', add = TRUE) -#' -#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) -#' -#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -#' a <- sf::st_sf(a, data.frame(ida = c(1, 2, 3))) -#' -#' sf::st_agr(a) <- sf::st_agr(b) <- "constant" -#' -#' calculate_area_intersection_weights(a, b, normalize = FALSE) -#' calculate_area_intersection_weights(a, b, normalize = TRUE) -#' calculate_area_intersection_weights(b, a, normalize = FALSE) -#' calculate_area_intersection_weights(b, a, normalize = TRUE) -#' -#' #a more typical arrangement of polygons -#' -#' b1 = sf::st_polygon(list(rbind(c(-1,-1), c(1,-1), -#' c(1,1), c(-1,1), -#' c(-1,-1)))) -#' b2 = b1 + 2 -#' b3 = b1 + c(0, 2) -#' b4 = b1 + c(2, 0) -#' b = sf::st_sfc(b1, b2, b3, b4) -#' a1 = b1 * 0.75 + c(-.25, -.25) -#' a2 = a1 + 1.5 -#' a3 = a1 + c(0, 1.5) -#' a4 = a1 + c(1.5, 0) -#' a = sf::st_sfc(a1,a2,a3, a4) -#' plot(b, border = 'red', lwd = 3) -#' plot(a, border = 'green', add = TRUE) -#' -#' sf::st_crs(b) <- sf::st_crs(a) <- sf::st_crs(5070) -#' -#' b <- sf::st_sf(b, data.frame(idb = c(1, 2, 3, 4))) -#' a <- sf::st_sf(a, data.frame(ida = c("a", "b", "c", "d"))) -#' -#' sf::st_agr(a) <- sf::st_agr(b) <- "constant" -#' -#' # say we have data from `a` that we want sampled to `b`. -#' # this gives the percent of each `a` that intersects each `b` +#' `normalize = FALSE`, if a polygon from x (source) is entirely within a polygon in y +#' (target), w will be 1. If a polygon from x (source) is 50% in one polygon from y (target) +#' and 50% in another, there will be two rows, one for each x/y pair of features with w = 0.5 +#' in each. Weights will sum to 1 per **SOURCE** polygon if the target polygons fully cover that +#' feature. #' -#' (a_b <- calculate_area_intersection_weights(a, b, normalize = FALSE)) +#' For `normalize = FALSE` the area weighted mean calculation must include the area of each +#' x (source) polygon as in: #' -#' # note that `w` sums to 1 where `b` completely covers `a`. +#' > *in this case, `area` is the area of source polygons and you would do this operation grouped +#' by target polygon id.* +#' +#' > `sum( (val * w * area), na.rm = TRUE ) / sum(w * area)` #' -#' dplyr::summarize(dplyr::group_by(a_b, ida), w = sum(w)) +#' If `normalize = TRUE`, weights are divided by the target polygon area such that weights +#' sum to 1 per TARGET polygon if the target polygon is fully covered by source polygons. +#' +#' For `normalize = FALSE` the area weighted mean calculation no area is required +#' as in: #' -#' # We can apply these weights like... -#' dplyr::tibble(ida = unique(a_b$ida), -#' val = c(1, 2, 3, 4)) |> -#' dplyr::left_join(a_b, by = "ida") |> -#' dplyr::mutate(val = ifelse(is.na(w), NA, val), -#' areasqkm = 1.5 ^ 2) |> # area of each polygon in `a` -#' dplyr::group_by(idb) |> # group so we get one row per `b` -#' # now we weight by the percent of the area from each polygon in `b` per polygon in `a` -#' dplyr::summarize(new_val = sum( (val * w * areasqkm), na.rm = TRUE ) / sum(w * areasqkm)) -#' -#' # we can go in reverse if we had data from b that we want sampled to a +#' > `sum( (val * w), na.rm = TRUE ) / sum(w)` +#' +#' See examples for illustration of these two modes. +#' +#' @examples +#' +#' library(sf) #' -#' (b_a <- calculate_area_intersection_weights(b, a, normalize = FALSE)) +#' source <- st_sf(source_id = c(1, 2), +#' val = c(10, 20), +#' geom = st_as_sfc(c( +#' "POLYGON ((0.2 1.2, 1.8 1.2, 1.8 2.8, 0.2 2.8, 0.2 1.2))", +#' "POLYGON ((-1.96 1.04, -0.04 1.04, -0.04 2.96, -1.96 2.96, -1.96 1.04))"))) #' -#' # note that `w` sums to 1 only where `a` complete covers `b` +#' source$area <- as.numeric(st_area(source)) #' -#' dplyr::summarize(dplyr::group_by(b_a, idb), w = sum(w)) +#' target <- st_sf(target_id = "a", +#' geom = st_as_sfc("POLYGON ((-1.2 1, 0.8 1, 0.8 3, -1.2 3, -1.2 1))")) #' -#' # with `normalize = TRUE`, `w` will sum to 1 when the target -#' # completely covers the source rather than when the source completely -#' # covers the target. +#' plot(source['val'], reset = FALSE) +#' plot(st_geometry(target), add = TRUE) #' -#' (a_b <- calculate_area_intersection_weights(a, b, normalize = TRUE)) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = FALSE, allow_lonlat = TRUE)) #' -#' dplyr::summarize(dplyr::group_by(a_b, idb), w = sum(w)) +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' (b_a <- calculate_area_intersection_weights(b, a, normalize = TRUE)) +#' sum(res$val * res$w * res$area) / sum(res$w * res$area) #' -#' dplyr::summarize(dplyr::group_by(b_a, ida), w = sum(w)) +#' (w <- +#' calculate_area_intersection_weights(source[c("source_id", "geom")], +#' target[c("target_id", "geom")], +#' normalize = TRUE, allow_lonlat = TRUE)) +#' (res <- +#' merge(st_drop_geometry(source), w, by = "source_id")) #' -#' # We can apply these weights like... -#' # Note that we don't need area in the normalized case -#' dplyr::tibble(ida = unique(a_b$ida), -#' val = c(1, 2, 3, 4)) |> -#' dplyr::left_join(a_b, by = "ida") |> -#' dplyr::mutate(val = ifelse(is.na(w), NA, val)) |> -#' dplyr::group_by(idb) |> # group so we get one row per `b` -#' # now we weight by the percent of the area from each polygon in `b` per polygon in `a` -#' dplyr::summarize(new_val = sum( (val * w), na.rm = TRUE )) - +#' sum(res$val * res$w) / sum(res$w) #' #' @export #' @importFrom sf st_intersection st_set_geometry st_area st_crs st_drop_geometry #' @importFrom dplyr mutate group_by right_join select ungroup left_join mutate calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = FALSE) { - + if(missing(normalize)) { warning("Required input normalize is missing, defaulting to FALSE.") normalize <- FALSE @@ -172,27 +138,26 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- areal::aw_intersect(y, source = x, areaVar = "area_intersection") - int <- areal::aw_total(int, source = x, - id = "varx", # the unique id in the "source" x - areaVar = "area_intersection", - totalVar = "totalArea_x", - type = "extensive", - weight = "total") - int <- areal::aw_weight(int, areaVar = "area_intersection", - totalVar = "totalArea_x", - areaWeight = "areaWeight_x_y") - - int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") + + if(!normalize) { - if(normalize) { + int <- areal::aw_total(int, + source = x, + id = "varx", # the unique id in the "source" x + areaVar = "area_intersection", + totalVar = "totalArea_x", + type = "extensive", + weight = "total") - # for normalized, we return the percent of each target covered by each source - int <- areal::aw_intersect(y, - source = x, - areaVar = "area_intersection") + int <- areal::aw_weight(int, areaVar = "area_intersection", + totalVar = "totalArea_x", + areaWeight = "areaWeight_x_y") - # for normalized, we sum the intersection area by the total target intersection area - int <- ungroup(mutate(group_by(int, vary), totalArea_y = sum(area_intersection))) + } else { + + # for normalized, we sum the intersection area by the total target area + int <- left_join(int, data.frame(vary = y$vary, + totalArea_y = as.numeric(sf::st_area(y))), by = "vary") int <- areal::aw_weight(int, areaVar = "area_intersection", @@ -203,8 +168,8 @@ calculate_area_intersection_weights <- function(x, y, normalize, allow_lonlat = int <- right_join(st_drop_geometry(int), st_drop_geometry(x), by = "varx") - int <- select(int, varx, vary, w = areaWeight_x_y) - + int <- select(int, varx, vary, w = "areaWeight_x_y") + names(int) <- c(id_x, id_y, "w") return(dplyr::as_tibble(int)) diff --git a/docs/404.html b/docs/404.html index 16a73ca..21aaa2c 100644 --- a/docs/404.html +++ b/docs/404.html @@ -6,7 +6,7 @@ Page not found (404) • ncdfgeom - + @@ -32,7 +32,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -109,7 +109,7 @@

Page not found (404)

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/DISCLAIMER.html b/docs/DISCLAIMER.html index 424a601..db4cf1d 100644 --- a/docs/DISCLAIMER.html +++ b/docs/DISCLAIMER.html @@ -1,5 +1,5 @@ -Disclaimer • ncdfgeomDisclaimer • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -87,7 +87,7 @@

Disclaimer

-

Site built with pkgdown 2.0.7.

+

Site built with pkgdown 2.0.9.

diff --git a/docs/articles/geometry.html b/docs/articles/geometry.html index 05a7031..414736f 100644 --- a/docs/articles/geometry.html +++ b/docs/articles/geometry.html @@ -6,7 +6,7 @@ Reading and Writing NetCDF-CF Geometry • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -131,7 +134,7 @@

    Load Spatial Data
    -(vars <- ncmeta::nc_vars(example_file))
    +(vars <- ncmeta::nc_vars(example_file))
     #> # A tibble: 5 × 5
     #>      id name         type      ndims natts
     #>   <int> <chr>        <chr>     <int> <int>
    @@ -146,7 +149,7 @@ 

    Load Spatial Data= "station", variables = vars$name) -> example_file

    Now the NetCDF file looks like:

    -
    #> netcdf file69986203fbb {
    +
    #> netcdf file63806c813359 {
     #> dimensions:
     #>  maxStrlen64 = 64 ;
     #>  station = 2 ;
    @@ -681,7 +684,7 @@ 

    Multiple MultiPolygons wi

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/index.html b/docs/articles/index.html index a3104bb..05de0fe 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,5 +1,5 @@ -Articles • ncdfgeomArticles • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -87,7 +87,7 @@

    All vignettes

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/ncdfgeom.html b/docs/articles/ncdfgeom.html index 34de8e3..e5586d1 100644 --- a/docs/articles/ncdfgeom.html +++ b/docs/articles/ncdfgeom.html @@ -6,7 +6,7 @@ NetCDF-CF Geometry and Timeseries Tools for R • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -538,7 +541,7 @@

    Read Timeseries and Geometry f

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/articles/ncdfgeom_files/figure-html/plot-1.png b/docs/articles/ncdfgeom_files/figure-html/plot-1.png index c87f77b..c150975 100644 Binary files a/docs/articles/ncdfgeom_files/figure-html/plot-1.png and b/docs/articles/ncdfgeom_files/figure-html/plot-1.png differ diff --git a/docs/articles/polygon_intersection.html b/docs/articles/polygon_intersection.html index 154fab4..d81e3a8 100644 --- a/docs/articles/polygon_intersection.html +++ b/docs/articles/polygon_intersection.html @@ -6,7 +6,7 @@ Area Weight Generation for Polygon Intersections • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0 @@ -103,81 +103,222 @@

    Area Weight Generation for Polygon

    It is a comparison with the gdptools python package demonstration here.

    -

    See calculate_area_intersection_weights() for additional -demonstration and info.

     
     gdptools_weights <- read.csv(system.file("extdata/gdptools_prl_out.csv", package = "ncdfgeom"), 
                                  colClasses = c("character", "character", "numeric"))
     
    -gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
    +gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght)
     
     gage_id <- "USGS-01482100"
    -basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
    -huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
    -#> Spherical geometry (s2) switched off
    -#> Spherical geometry (s2) switched on
    -huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
    -#> Spherical geometry (s2) switched off
    -#> Spherical geometry (s2) switched on
    +basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id))
    +huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08")
    +huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12")
     
     org_par <- par(mar = c(0, 0, 0, 0))
    -plot(sf::st_as_sfc(sf::st_bbox(huc12)))
    -plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
    -plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
    -plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
    -

    -
    -par(org_par)
    +plot(sf::st_as_sfc(sf::st_bbox(huc12)))
    +plot(sf::st_geometry(basin), lwd = 4, add = TRUE)
    +plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2)
    +plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey")
    +par(org_par)
     
     weights <- ncdfgeom::calculate_area_intersection_weights(
    -  x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
    -  y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
    +  x = sf::st_transform(dplyr::select(huc12, huc12), 6931),
    +  y = sf::st_transform(dplyr::select(huc08, huc8), 6931),
       normalize = TRUE
     )
    -#> Loading required namespace: areal
     
    -weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))
    +weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12"))

    With weights calculated, we can do a little investigation into the differences.

    -
    +
     
     weights$diff <- weights$w - weights$gdptools_wght
     
     # make sure nothing is way out of whack
     max(weights$diff, na.rm = TRUE)
    -#> [1] 0.0000009205585
     
     # ensure the weights generally sum as we would expect.
     sum(weights$gdptools_wght, na.rm = TRUE)
    -#> [1] 25
     sum(weights$w, na.rm = TRUE)
    -#> [1] 25
     length(unique(na.omit(weights$huc8)))
    -#> [1] 25
     
     # see how many NA values we have in each.
     sum(is.na(weights$w))
    -#> [1] 183
     sum(is.na(weights$gdptools_wght))
    -#> [1] 183
     
     # look at cases where gptools has NA and ncdfgeom does not
    -weights[is.na(weights$gdptools_wght),]
    -#> # A tibble: 183 × 5
    -#>    huc12        huc8      w gdptools_wght  diff
    -#>    <chr>        <chr> <dbl>         <dbl> <dbl>
    -#>  1 011000050101 NA       NA            NA    NA
    -#>  2 011000050201 NA       NA            NA    NA
    -#>  3 011000050202 NA       NA            NA    NA
    -#>  4 011000050203 NA       NA            NA    NA
    -#>  5 011000050305 NA       NA            NA    NA
    -#>  6 011000050501 NA       NA            NA    NA
    -#>  7 011000050504 NA       NA            NA    NA
    -#>  8 020200030604 NA       NA            NA    NA
    -#>  9 020200030801 NA       NA            NA    NA
    -#> 10 020200030802 NA       NA            NA    NA
    -#> # ℹ 173 more rows
    +weights[is.na(weights$gdptools_wght) & !is.na(weights$w),]
    +

    The following example illustrates the nuances between normalized and +non-normalized area weights and shows more specifically how area weight +intersection calculations can be accomplished.

    +

    The set of polygons are a contrived but useful for the sake of +demonstration.

    +
    +
    +library(dplyr)
    +library(sf)
    +library(ncdfgeom)
    +
    +g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1)))
    +
    +blue1 = sf::st_polygon(g) * 0.8
    +blue2 = blue1 + c(1, 2)
    +blue3 = blue1 * 1.2 + c(-1, 2)
    +
    +pink1 = sf::st_polygon(g)
    +pink2 = pink1 + 2
    +pink3 = pink1 + c(-0.2, 2)
    +pink4 = pink1 + c(2.2, 0)
    +
    +blue = sf::st_sfc(blue1,blue2,blue3)
    +
    +pink = sf::st_sfc(pink1, pink2, pink3, pink4)
    +
    +plot(c(blue,pink), border = NA)
    +plot(blue, border = "#648fff", add = TRUE)
    +plot(pink, border = "#dc267f", add = TRUE)
    +
    +blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3)))
    +pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10)))
    +
    +text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4),
    +     sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3),
    +     blue$idblue, col = "#648fff")
    +
    +text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4),
    +     sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])),
    +     pink$idpink, col = "#dc267f")
    +
    +sf::st_agr(blue) <- sf::st_agr(pink) <- "constant"
    +sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070)
    +
    +
    +(blue_pink_norm_false <- 
    +calculate_area_intersection_weights(blue, pink, normalize = FALSE))
    +

    NOTE: normalize = FALSE so weights sum to 1 per source polygon only +when a source polygon is fully covered by the target. The +non-intersecting portion is not included.

    +

    The following breaks down how to use these weights for one source +polygon.

    +
    +blue$val = c(30, 10, 20)
    +blue$area <- as.numeric(sf::st_area(blue))
    +
    +(result <- st_drop_geometry(blue) |>
    +  left_join(blue_pink_norm_false, by = "idblue"))
    +

    To calculate the value for pink-9, we would do:

    +
    +
    +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864))
    +

    This is saying that 0.375 of blue-3 covers pink-9 and 0.6 of blue-2 +covers pink-9. Since we are using area as the weighting method, we +multiply the fraction of each source polygon by its area and the value +we want to create an area weight for. We sum the contributions from +blue-2 and blue-3 to pink-9 and divide by the sum of the combined area +weights.

    +

    Note that because there is no contribution to 9 over some parts of +the polygon, that missing area does not appear. The intersecting areas +are 0.96 and 2.23 meaning that we are missing

    +

    4 - 0.96 - 2.23 = 0.81

    +

    and could rewrite the value for pink-9 as:

    +
    +((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) / 
    +  ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864))
    +

    Which evaluates to NA. This is why for this operation we usually drop +NA terms!

    +

    In practice, the above can be accomplished with:

    +
    +(result <- result |>
    +  group_by(idpink) |> # group so we get one row per target
    +  # now we calculate the value for each `pink` with fraction of the area of each
    +  # polygon in `blue` per polygon in `pink` with an equation like this:
    +  summarize(
    +    new_val = sum( (val * w * area) ) / sum(w * area)))
    +

    Now let’s do the same thing but with +normalize = FALSE.

    +
    +
    +(blue_pink_norm_true <-
    +calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE))
    +

    NOTE: normalize = TRUE so weights sum to 1 per target polygon. +Non-overlap is ignored as if it does not exist.

    +

    The following breaks down how to use these weights for one source +polygon.

    +
    +(result <- st_drop_geometry(blue) |>
    +    left_join(blue_pink_norm_true, by = "idblue"))
    +

    To calculate the value for pink-9, we would do:

    +
    +((10 * 0.24) + (20 * 0.5568)) / (0.24 + (0.5568))
    +

    This is saying that the portion of pink-9 that should get the value +from blue-2 is 0.3 and the portion of pink-9 that should get the value +from blue-3 is 0.7. In this form, our weights are transformed to +includethe relative area of the source polygons.

    +

    As shown above as well, the calculation can be accomplished with:

    +
    +(result <- result |>
    +    group_by(idpink) |> # group so we get one row per target
    +    # now we calculate the value for each `pink` with fraction of the area of each
    +    # polygon in `blue` per polygon in `pink` with an equation like this:
    +    summarize(
    +      new_val = sum( (val * w) ) / sum(w)))
    +

    We can look at a more typical arrangement of polygons and look at +this a different way.

    +

    Let’s also look at the values.

    +
    +# say we have data from `blue` that we want sampled to `pink`.
    +# this gives the percent of each `blue` that intersects each `pink`
    +
    +(blue_pink <- calculate_area_intersection_weights(
    +  select(blue, idblue), select(pink, idpink), normalize = FALSE))
    +
    +# NOTE: `w` sums to 1 per `blue` in all cases
    +
    +summarize(group_by(blue_pink, idblue), w = sum(w))
    +
    +# Since normalize is false, we apply weights like:
    +st_drop_geometry(blue) |>
    +  left_join(blue_pink, by = "idblue") |>
    +  mutate(blue_areasqkm = 1.5 ^ 2) |> # add area of each polygon in `blue`
    +  group_by(idpink) |> # group so we get one row per `pink`
    +  # now we calculate the value for each b with fraction of the area of each
    +  # polygon in `blue` per polygon in `pink` with an equation like this:
    +  summarize(
    +    new_val = sum( (val * w * blue_areasqkm) ) / sum(w * blue_areasqkm))
    +
    +# NOTE: `w` is the fraction of the polygon in `blue`. We need to multiply `w` by the
    +# unique area of the polygon it is associated with to get the weighted mean weight.
    +
    +# we can go in reverse if we had data from `pink` that we want sampled to `blue`
    +
    +(pink_blue <- calculate_area_intersection_weights(
    +  select(pink, idpink), select(blue, idblue), normalize = FALSE))
    +
    +# NOTE: `w` sums to 1 per `pink` (source) only where `pink` is fully covered by `blue` (target).
    +
    +summarize(group_by(pink_blue, idpink), w = sum(w))
    +
    +# Now let's look at what happens if we set normalize = TRUE. Here we
    +# get `blue` as source and `pink` as target but normalize the weights so
    +# the area of `blue` is built into `w`.
    +
    +(blue_pink <- calculate_area_intersection_weights(
    +  select(blue, idblue), select(pink, idpink), normalize = TRUE))
    +
    +# NOTE: if we summarize by `pink` (target) `w` sums to 1 only where there is full overlap.
    +
    +summarize(group_by(blue_pink, idpink), w = sum(w))
    +
    +# Since normalize is false, we apply weights like:
    +st_drop_geometry(blue) |>
    +  left_join(blue_pink, by = "idblue") |>
    +  group_by(idpink) |> # group so we get one row per `pink`
    +  # now we weight by the percent of each polygon in `pink` per polygon in `blue`
    +  summarize(new_val = sum( (val * w) ) / sum( w ))
    +
    +# NOTE: `w` is the fraction of the polygon from `blue` overlapping the polygon from `pink`.
    +# The area of `blue` is built into the weight so we just sum the weith times value oer polygon.
    @@ -55,6 +55,9 @@
  • Reading and Writing NetCDF-CF Geometry
  • +
  • + Area Weight Generation for Polygon Intersections +
  • Reading and Writing Timeseries
  • @@ -163,7 +166,7 @@

    Reading and Writing Timeseries

    timeseries ID (which is stored as a string).
    -ncmeta::nc_dims(nc_file)
    +ncmeta::nc_dims(nc_file)
     #> # A tibble: 3 × 4
     #>      id name               length unlim
     #>   <int> <chr>               <dbl> <lgl>
    @@ -173,7 +176,7 @@ 

    Reading and Writing Timeseries

    The file has variables for latitude, longitude, altitude, timeseries IDs, and a data variable.

    -ncmeta::nc_vars(nc_file)
    +ncmeta::nc_vars(nc_file)
     #> # A tibble: 6 × 5
     #>      id name                                        type      ndims natts
     #>   <int> <chr>                                       <chr>     <int> <int>
    @@ -186,7 +189,7 @@ 

    Reading and Writing Timeseries

    The primary dimensions in the file are of length, number of time steps and number of time series.

    -ncmeta::nc_dims(nc_file)
    +ncmeta::nc_dims(nc_file)
     #> # A tibble: 3 × 4
     #>      id name               length unlim
     #>   <int> <chr>               <dbl> <lgl>
    @@ -289,7 +292,7 @@ 

    Reading and Writing Timeseries

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/authors.html b/docs/authors.html index eda525b..97b12c3 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,5 +1,5 @@ -Authors and Citation • ncdfgeomAuthors and Citation • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -63,7 +63,7 @@
    @@ -107,7 +107,7 @@

    Citation

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/index.html b/docs/index.html index 88a2c6d..b03fa50 100644 --- a/docs/index.html +++ b/docs/index.html @@ -6,7 +6,7 @@ NetCDF Geometry and Time Series • ncdfgeom - + @@ -33,7 +33,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -104,7 +104,7 @@

    Installationinstall.packages("ncdfgeom")

    For the latest development version:

    install.packages("remotes")
    -remotes::install_github("DOI-USGS/ncdfgeom")
    +remotes::install_github("DOI-USGS/ncdfgeom")

    Contributing @@ -174,7 +174,7 @@

    Developers

    -

    Site built with pkgdown 2.0.7.

    +

    Site built with pkgdown 2.0.9.

    diff --git a/docs/news/index.html b/docs/news/index.html index 3d55034..7e90730 100644 --- a/docs/news/index.html +++ b/docs/news/index.html @@ -1,5 +1,5 @@ -Changelog • ncdfgeomChangelog • ncdfgeom @@ -17,7 +17,7 @@ ncdfgeom - 1.2.0 + 1.3.0
    @@ -90,7 +90,7 @@