Merge pull request #68 from rsbivand/shp_to_gpkg

add GPKG and correct itemize curly brackets (CRAN NOTE)
Nowosad · May 31, 2024 · 80ab8ca · 80ab8ca
2 parents 1731631 + 481460a
commit 80ab8ca
Show file tree

Hide file tree

Showing 71 changed files with 652 additions and 682 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: spData
 Title: Datasets for Spatial Analysis
-Version: 2.3.0
+Version: 2.3.1
 Authors@R: c(person("Roger", "Bivand", role = "aut", email="[email protected]", comment = c(ORCID = "0000-0003-2392-6140")),
     person("Jakub", "Nowosad", role = c("aut", "cre"), email="[email protected]", comment = c(ORCID = "0000-0002-1057-3721")),
     person("Robin", "Lovelace", role = "aut", comment = c(ORCID = "0000-0001-5679-6536")),
@@ -23,9 +23,9 @@ Suggests:
     sf (>= 0.9-1),
     spDataLarge (>= 0.4.0),
     spdep,
-    spatialreg,
+    spatialreg
 License: CC0
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 LazyData: true
 URL: https://jakubnowosad.com/spData/
 BugReports: https://github.com/Nowosad/spData/issues

diff --git a/R/afcon.R b/R/afcon.R
@@ -6,11 +6,11 @@
 #' 
 #' @format This data frame contains the following columns:
 #' \itemize{
-#'         \item{x} {an easting in decimal degrees (taken as centroid of shapefile polygon)}
-#'         \item{y} {an northing in decimal degrees (taken as centroid of shapefile polygon)}
-#'         \item{totcon} {index of total conflict 1966-78}
-#'         \item{name} {country name}
-#'         \item{id} {country id number as in paper}
+#'         \item{x: an easting in decimal degrees (taken as centroid of shapefile polygon)}
+#'         \item{y: an northing in decimal degrees (taken as centroid of shapefile polygon)}
+#'         \item{totcon: index of total conflict 1966-78}
+#'         \item{name: country name}
+#'         \item{id: country id number as in paper}
 #' }
 #' 
 #' @source 

diff --git a/R/alaska.R b/R/alaska.R
@@ -7,13 +7,13 @@
 #' 
 #' @format Formal class 'sf' [package "sf"]; the data contains a data.frame with 1 obs. of 7 variables:
 #' \itemize{
-#'     \item{GEOID} {character vector of geographic identifiers}
-#'     \item{NAME} {character vector of state names}
-#'     \item{REGION} {character vector of region names}
-#'     \item{AREA} {area in square kilometers of units class}
-#'     \item{total_pop_10} {numerical vector of total population in 2010}
-#'     \item{total_pop_15} {numerical vector of total population in 2015}
-#'     \item{geometry} {sfc_MULTIPOLYGON}
+#'     \item{GEOID: character vector of geographic identifiers}
+#'     \item{NAME: character vector of state names}
+#'     \item{REGION: character vector of region names}
+#'     \item{AREA: area in square kilometers of units class}
+#'     \item{total_pop_10: numerical vector of total population in 2010}
+#'     \item{total_pop_15: numerical vector of total population in 2015}
+#'     \item{geometry: sfc_MULTIPOLYGON}
 #' }
 #' The object is in projected coordinates using Alaska Albers (EPSG:3467).
 #' 
@@ -30,4 +30,4 @@
 #' 
 #'   plot(alaska["total_pop_15"])
 #' }
-"alaska"
+"alaska"
diff --git a/R/auckland.R b/R/auckland.R
@@ -6,10 +6,10 @@
 #' 
 #' @format This data frame contains the following columns:
 #' \itemize{
-#'         \item{Easting} {a numeric vector of x coordinates in an unknown spatial reference system}
-#'         \item{Northing} {a numeric vector of y coordinates in an unknown spatial reference system}
-#'         \item{M77_85} {a numeric vector of counts of infant (under 5 years of age) deaths in Auckland, 1977-1985}
-#'         \item{Und5_81} {a numeric vector of population under 5 years of age at the 1981 Census}
+#'         \item{Easting: a numeric vector of x coordinates in an unknown spatial reference system}
+#'         \item{Northing: a numeric vector of y coordinates in an unknown spatial reference system}
+#'         \item{M77_85: a numeric vector of counts of infant (under 5 years of age) deaths in Auckland, 1977-1985}
+#'         \item{Und5_81: a numeric vector of population under 5 years of age at the 1981 Census}
 #' }
 #' 
 #' @details The contiguous neighbours object does not completely replicate results in the sources, and was reconstructed from \code{auckpolys}; examination of figures in the sources suggests that there are differences in detail, although probably not in substance.
@@ -19,13 +19,10 @@
 #' 
 #' @examples 
 #' if (requireNamespace("sf", quietly = TRUE)) {
-#'   library(sp)
-#'   auckland <- sf::st_read(system.file("shapes/auckland.shp", package="spData")[1])
-#'   auckland <- as(auckland, "Spatial")
-#'   plot(auckland)
+#'   auckland <- sf::st_read(system.file("shapes/auckland.gpkg", package="spData")[1])
+#'   plot(sf::st_geometry(auckland))
 #'   if (requireNamespace("spdep", quietly = TRUE)) {
-#'     library(spdep)
-#'     auckland.nb <- poly2nb(auckland)
+#'     auckland.nb <- spdep::poly2nb(auckland)
 #'   }
 #' }
 #' 

diff --git a/R/baltimore.R b/R/baltimore.R
@@ -6,23 +6,23 @@
 #' 
 #' @format A data frame with 211 observations on the following 17 variables.
 #' \itemize{
-#'         \item{STATION} {a numeric vector}
-#'         \item{PRICE} {a numeric vector}
-#'         \item{NROOM} {a numeric vector}
-#'         \item{DWELL} {a numeric vector}
-#'         \item{NBATH} {a numeric vector}
-#'         \item{PATIO} {a numeric vector}
-#'         \item{FIREPL} {a numeric vector}
-#'         \item{AC} {a numeric vector}
-#'         \item{BMENT} {a numeric vector}
-#'         \item{NSTOR} {a numeric vector}
-#'         \item{GAR} {a numeric vector}
-#'         \item{AGE} {a numeric vector}
-#'         \item{CITCOU} {a numeric vector}
-#'         \item{LOTSZ} {a numeric vector}
-#'         \item{SQFT} {a numeric vector}
-#'         \item{X} {a numeric vector}
-#'         \item{Y} {a numeric vector}
+#'         \item{STATION: a numeric vector}
+#'         \item{PRICE: a numeric vector}
+#'         \item{NROOM: a numeric vector}
+#'         \item{DWELL: a numeric vector}
+#'         \item{NBATH: a numeric vector}
+#'         \item{PATIO: a numeric vector}
+#'         \item{FIREPL: a numeric vector}
+#'         \item{AC: a numeric vector}
+#'         \item{BMENT: a numeric vector}
+#'         \item{NSTOR: a numeric vector}
+#'         \item{GAR: a numeric vector}
+#'         \item{AGE: a numeric vector}
+#'         \item{CITCOU: a numeric vector}
+#'         \item{LOTSZ: a numeric vector}
+#'         \item{SQFT: a numeric vector}
+#'         \item{X: a numeric vector}
+#'         \item{Y: a numeric vector}
 #' }
 #' 
 #' @source Prepared by Luc Anselin. Original data made available by Robin Dubin, Weatherhead School of Management, Case Western Research University, Cleveland, OH. http://sal.agecon.uiuc.edu/datasets/baltimore.zip

diff --git a/R/boston.R b/R/boston.R
@@ -6,55 +6,55 @@
 #' 
 #' @format   This data frame contains the following columns:
 #' \itemize{
-#'         \item{TOWN} {a factor with levels given by town names}
-#'         \item{TOWNNO} {a numeric vector corresponding to TOWN}
-#'         \item{TRACT} {a numeric vector of tract ID numbers}
-#'         \item{LON} {a numeric vector of tract point longitudes in decimal degrees}
-#'         \item{LAT} {a numeric vector of tract point latitudes in decimal degrees}
-#'         \item{MEDV} {a numeric vector of median values of owner-occupied housing
+#'         \item{TOWN: a factor with levels given by town names}
+#'         \item{TOWNNO: a numeric vector corresponding to TOWN}
+#'         \item{TRACT: a numeric vector of tract ID numbers}
+#'         \item{LON: a numeric vector of tract point longitudes in decimal degrees}
+#'         \item{LAT: a numeric vector of tract point latitudes in decimal degrees}
+#'         \item{MEDV: a numeric vector of median values of owner-occupied housing
 #'                 in USD 1000}
-#'         \item{CMEDV} {a numeric vector of corrected median values of
+#'         \item{CMEDV: a numeric vector of corrected median values of
 #'                 owner-occupied housing in USD 1000}
-#'         \item{CRIM} {a numeric vector of per capita crime}
-#'         \item{ZN} {a numeric vector of proportions of residential land zoned
+#'         \item{CRIM: a numeric vector of per capita crime}
+#'         \item{ZN: a numeric vector of proportions of residential land zoned
 #'                 for lots over 25000 sq. ft per town (constant for all Boston tracts)}
-#'         \item{INDUS} {a numeric vector of proportions of non-retail business
+#'         \item{INDUS: a numeric vector of proportions of non-retail business
 #'                 acres per town (constant for all Boston tracts)}
-#'         \item{CHAS} {a factor with levels 1 if tract borders Charles River;
+#'         \item{CHAS: a factor with levels 1 if tract borders Charles River;
 #'                 0 otherwise}
-#'         \item{NOX} {a numeric vector of nitric oxides concentration (parts per
+#'         \item{NOX: a numeric vector of nitric oxides concentration (parts per
 #'                                                                     10 million) per town}
-#'         \item{RM} {a numeric vector of average numbers of rooms per dwelling}
-#'         \item{AGE} {a numeric vector of proportions of owner-occupied units
+#'         \item{RM: a numeric vector of average numbers of rooms per dwelling}
+#'         \item{AGE: a numeric vector of proportions of owner-occupied units
 #'                 built prior to 1940}
-#'         \item{DIS} {a numeric vector of weighted distances to five Boston
+#'         \item{DIS: a numeric vector of weighted distances to five Boston
 #'                 employment centres}
-#'         \item{RAD} {a numeric vector of an index of accessibility to radial
+#'         \item{RAD: a numeric vector of an index of accessibility to radial
 #'                 highways per town (constant for all Boston tracts)}
-#'         \item{TAX} {a numeric vector full-value property-tax rate per USD
+#'         \item{TAX: a numeric vector full-value property-tax rate per USD
 #'                 10,000 per town (constant for all Boston tracts)}
-#'         \item{PTRATIO} {a numeric vector of pupil-teacher ratios per town
+#'         \item{PTRATIO: a numeric vector of pupil-teacher ratios per town
 #'                 (constant for all Boston tracts)}
-#'         \item{B} {a numeric vector of \code{1000*(Bk - 0.63)^2} where Bk is the
+#'         \item{B: a numeric vector of \code{1000*(Bk - 0.63)^2} where Bk is the
 #'                 proportion of blacks}
-#'         \item{LSTAT} {a numeric vector of percentage values of lower status
+#'         \item{LSTAT: a numeric vector of percentage values of lower status
 #'                 population}
 #' }
-#' @note Details of the creation of the tract shapefile given in final don't run block; tract boundaries for 1990 (formerly at: http://www.census.gov/geo/cob/bdy/tr/tr90shp/tr25_d90_shp.zip, counties in the BOSTON SMSA http://www.census.gov/population/metro/files/lists/historical/63mfips.txt); tract conversion table 1980/1970 (formerly at : https://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/7913?q=07913&permit[0]=AVAILABLE, http://www.icpsr.umich.edu/cgi-bin/bob/zipcart2?path=ICPSR&study=7913&bundle=all&ds=1&dups=yes). The shapefile contains corrections and extra variables (tract 3592 is corrected to 3593; the extra columns are:
+#' @note Details of the creation of the tract GPKG file: tract boundaries for 1990 (formerly at: http://www.census.gov/geo/cob/bdy/tr/tr90shp/tr25_d90_shp.zip, counties in the BOSTON SMSA http://www.census.gov/population/metro/files/lists/historical/63mfips.txt); tract conversion table 1980/1970 (formerly at : https://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/7913?q=07913&permit[0]=AVAILABLE, http://www.icpsr.umich.edu/cgi-bin/bob/zipcart2?path=ICPSR&study=7913&bundle=all&ds=1&dups=yes). The shapefile contains corrections and extra variables (tract 3592 is corrected to 3593; the extra columns are:
 #'  \itemize{
-#'    \item{units}{number of single family houses}
-#'    \item{cu5k}{count of units under USD 5,000}
-#'    \item{c5_7_5}{counts USD 5,000 to 7,500}
-#'    \item{C*_*}{interval counts}
-#'    \item{co50k}{count of units over USD 50,000}
-#'    \item{median}{recomputed median values}
-#'    \item{BB}{recomputed black population proportion}
-#'    \item{censored}{whether censored or not}
-#'    \item{NOXID}{NOX model zone ID}
-#'    \item{POP}{tract population}
+#'    \item{units: number of single family houses}
+#'    \item{cu5k: count of units under USD 5,000}
+#'    \item{c5_7_5: counts USD 5,000 to 7,500}
+#'    \item{C*_*: interval counts}
+#'    \item{co50k: count of units over USD 50,000}
+#'    \item{median: recomputed median values}
+#'    \item{BB: recomputed black population proportion}
+#'    \item{censored: whether censored or not}
+#'    \item{NOXID: NOX model zone ID}
+#'    \item{POP: tract population}
 #'  }
 #' 
-#' @source \url{http://lib.stat.cmu.edu/datasets/boston_corrected.txt}
+#' @source Previously available from http://lib.stat.cmu.edu/datasets/boston_corrected.txt
 #' @references 
 #' Harrison, David, and Daniel L. Rubinfeld, Hedonic Housing Prices and the Demand for Clean Air, \emph{Journal of Environmental Economics and Management}, Volume 5, (1978), 81-102. Original data.
 #' 
@@ -69,7 +69,6 @@
 #' 
 #' @examples 
 #' if (requireNamespace("spdep", quietly = TRUE)) {
-#'   library(spdep)
 #'   data(boston)
 #'   hr0 <- lm(log(MEDV) ~ CRIM + ZN + INDUS + CHAS + I(NOX^2) + I(RM^2) +
 #'                     AGE + log(DIS) + log(RAD) + TAX + PTRATIO + B + log(LSTAT), data = boston.c)
@@ -79,28 +78,14 @@
 #'                     AGE + log(DIS) + log(RAD) + TAX + PTRATIO + B + log(LSTAT), data = boston.c)
 #'   summary(gp0)
 #'   logLik(gp0)
-#'   lm.morantest(hr0, nb2listw(boston.soi))
+#'   spdep::lm.morantest(hr0, spdep::nb2listw(boston.soi))
 #' }
-#' \dontrun{
-#' boston.tr <- sf::st_read(system.file("shapes/boston_tracts.shp",
+#' if (requireNamespace("sf", quietly = TRUE)) {
+#' boston.tr <- sf::st_read(system.file("shapes/boston_tracts.gpkg",
 #'                            package="spData")[1])
-#' boston.tr <- as(boston.tr, "Spatial")                           
-#' boston_nb <- poly2nb(boston.tr)
-#' }
-#' \dontrun{
-#' if (requireNamespace("spatialreg", quietly = TRUE)) {
-#'   library(spatialreg)
-#'   gp1 <- errorsarlm(log(CMEDV) ~ CRIM + ZN + INDUS + CHAS + I(NOX^2)
-#'                              + I(RM^2) +  AGE + log(DIS) + log(RAD) +
-#'                               TAX + PTRATIO + B + log(LSTAT),
-#'                              data=boston.c, nb2listw(boston.soi), method="Matrix", 
-#'                              control=list(tol.opt = .Machine$double.eps^(1/4)))
-#'   summary(gp1)
-#'   gp2 <- lagsarlm(log(CMEDV) ~ CRIM + ZN + INDUS + CHAS + I(NOX^2) + I(RM^2)
-#'                   +  AGE + log(DIS) + log(RAD) + TAX + PTRATIO + B + log(LSTAT),
-#'                   data=boston.c, nb2listw(boston.soi), method="Matrix")
-#'   summary(gp2)
-#' }
+#'   if (requireNamespace("spdep", quietly = TRUE)) {
+#'     boston_nb <- spdep::poly2nb(boston.tr)
+#'   }
 #' }
 #' 
 NULL
diff --git a/R/coffee_data.R b/R/coffee_data.R
@@ -6,9 +6,9 @@
 #' 
 #' @format A data frame (tibble) with 58 for the following 12 variables:
 #' \itemize{
-#' \item{name_long} {name of country or coffee variety}
-#' \item{coffee_production_2016} {production in 2016}
-#' \item{coffee_production_2017} {production in 2017}
+#' \item{name_long: name of country or coffee variety}
+#' \item{coffee_production_2016: production in 2016}
+#' \item{coffee_production_2017: production in 2017}
 #' }
 #' 
 #' @details The examples section shows how this can be joined with spatial data to create a simple map.
@@ -19,12 +19,13 @@
 #' @examples 
 #' head(coffee_data)
 #' \dontrun{
+#' if (requireNamespace("dplyr")) {
 #' library(dplyr)
 #' library(sf)
 #' # found by searching for "global coffee data"
 #' u = "http://www.ico.org/prices/m1-exports.pdf"
 #' download.file(u, "data.pdf", mode = "wb")
-#' install.packages("pdftables") # requires api key
+#' if (requireNamespace("pdftables")) { # requires api key
 #' pdftables::convert_pdf(input_file = "data.pdf", output_file = "coffee-data-messy.csv")
 #' d = read_csv("coffee-data-messy.csv")
 #' file.remove("coffee-data-messy.csv")
@@ -42,5 +43,6 @@
 #'   tm_fill("coffee_production_2017", title = "Thousand 60kg bags", breaks = b,
 #'           textNA = "No data", colorNA = NULL)
 #' tmap_mode("view") # for an interactive version
+#' }}
 #' }
 "coffee_data"
diff --git a/R/columbus.R b/R/columbus.R
@@ -6,28 +6,28 @@
 #' 
 #' @format This data frame contains the following columns:
 #'        \itemize{
-#'                \item{AREA} {computed by ArcView}
-#'                \item{PERIMETER} {computed by ArcView}
-#'                \item{COLUMBUS_} {internal polygon ID (ignore)}
-#'                \item{COLUMBUS_I} {another internal polygon ID (ignore)}
-#'                \item{POLYID} {yet another polygon ID}
-#'                \item{NEIG} {neighborhood id value (1-49);
+#'                \item{AREA: computed by ArcView}
+#'                \item{PERIMETER: computed by ArcView}
+#'                \item{COLUMBUS_: internal polygon ID (ignore)}
+#'                \item{COLUMBUS_I: another internal polygon ID (ignore)}
+#'                \item{POLYID: yet another polygon ID}
+#'                \item{NEIG: neighborhood id value (1-49);
 #'                        conforms to id value used in Spatial Econometrics book.}
-#'                \item{HOVAL} {housing value (in 1,000 USD)}
-#'                \item{INC} {household income (in 1,000 USD)}
-#'                \item{CRIME} {residential burglaries and vehicle thefts per thousand
+#'                \item{HOVAL: housing value (in 1,000 USD)}
+#'                \item{INC: household income (in 1,000 USD)}
+#'                \item{CRIME: residential burglaries and vehicle thefts per thousand
 #'                        households in the neighborhood}
-#'                \item{OPEN} {open space in neighborhood}
-#'                \item{PLUMB} {percentage housing units without plumbing}
-#'                \item{DISCBD} {distance to CBD}
-#'                \item{X} {x coordinate (in arbitrary digitizing units, not polygon coordinates)}
-#'                \item{Y} {y coordinate (in arbitrary digitizing units, not polygon coordinates)}
-#'                \item{NSA} {north-south dummy (North=1)}
-#'                \item{NSB} {north-south dummy (North=1)}
-#'                \item{EW} {east-west dummy (East=1)}
-#'                \item{CP} {core-periphery dummy (Core=1)}
-#'                \item{THOUS} {constant=1,000}
-#'                \item{NEIGNO} {NEIG+1,000, alternative neighborhood id value}
+#'                \item{OPEN: open space in neighborhood}
+#'                \item{PLUMB: percentage housing units without plumbing}
+#'                \item{DISCBD: distance to CBD}
+#'                \item{X: x coordinate (in arbitrary digitizing units, not polygon coordinates)}
+#'                \item{Y: y coordinate (in arbitrary digitizing units, not polygon coordinates)}
+#'                \item{NSA: north-south dummy (North=1)}
+#'                \item{NSB: north-south dummy (North=1)}
+#'                \item{EW: east-west dummy (East=1)}
+#'                \item{CP: core-periphery dummy (Core=1)}
+#'                \item{THOUS: constant=1,000}
+#'                \item{NEIGNO: NEIG+1,000, alternative neighborhood id value}
 #'        }
 #' @details The row names of \code{columbus} and the \code{region.id} attribute of \code{polys} are set to \code{columbus$NEIGNO}.
 #' @source Anselin, Luc.  1988.  Spatial econometrics: methods and models.  Dordrecht: Kluwer Academic, Table 12.1 p. 189.
@@ -37,10 +37,8 @@
 #' 
 #' @examples 
 #' if (requireNamespace("sf", quietly = TRUE)) {
-#'   library(sp)
-#'   columbus <- sf::st_read(system.file("shapes/columbus.shp", package="spData")[1])
-#'   columbus <- as(columbus, "Spatial")
-#'   plot(columbus)
+#'   columbus <- sf::st_read(system.file("shapes/columbus.gpkg", package="spData")[1])
+#'   plot(sf::st_geometry(columbus))
 #' }
 #' 
 #' if (requireNamespace("spdep", quietly = TRUE)) {

diff --git a/R/congruent.R b/R/congruent.R
@@ -23,11 +23,11 @@
 #' }
 #' # Code used to download the data:
 #' \dontrun{
-#' devtools::install_github("robinlovelace/ukboundaries")
+#' #devtools::install_github("robinlovelace/ukboundaries")
 #' library(sf)
 #' library(tmap)
 #' library(dplyr)
-#' library(ukboundaries)
+#' #library(ukboundaries)
 #' sel = grepl("003|004", msoa2011_lds$geo_label)
 #' aggregating_zones = st_transform(msoa2011_lds[sel, ], 27700)
 #' # find lsoas in the aggregating_zones