diff --git a/Cargo.lock b/Cargo.lock index 8d1ec9271..3a1e923ec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5497,6 +5497,7 @@ dependencies = [ "sedona-geoparquet", "sedona-proj", "sedona-schema", + "serde_json", "thiserror 2.0.17", "tokio", ] diff --git a/r/sedonadb/R/000-wrappers.R b/r/sedonadb/R/000-wrappers.R index 6cd4654f2..10b904b8c 100644 --- a/r/sedonadb/R/000-wrappers.R +++ b/r/sedonadb/R/000-wrappers.R @@ -74,6 +74,11 @@ NULL } +`parse_crs_metadata` <- function(`crs_json`) { + .Call(savvy_parse_crs_metadata__impl, `crs_json`) +} + + `sedonadb_adbc_init_func` <- function() { .Call(savvy_sedonadb_adbc_init_func__impl) } @@ -351,6 +356,66 @@ class(`InternalDataFrame`) <- c( cat('sedonadb::InternalDataFrame\n') } +### wrapper functions for SedonaCrsR + +`SedonaCrsR_authority_code` <- function(self) { + function() { + .Call(savvy_SedonaCrsR_authority_code__impl, `self`) + } +} + +`SedonaCrsR_display` <- function(self) { + function() { + .Call(savvy_SedonaCrsR_display__impl, `self`) + } +} + +`SedonaCrsR_srid` <- function(self) { + function() { + .Call(savvy_SedonaCrsR_srid__impl, `self`) + } +} + +`SedonaCrsR_to_crs_string` <- function(self) { + function() { + .Call(savvy_SedonaCrsR_to_crs_string__impl, `self`) + } +} + +`SedonaCrsR_to_json` <- function(self) { + function() { + .Call(savvy_SedonaCrsR_to_json__impl, `self`) + } +} + +`.savvy_wrap_SedonaCrsR` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`authority_code` <- `SedonaCrsR_authority_code`(ptr) + e$`display` <- `SedonaCrsR_display`(ptr) + e$`srid` <- `SedonaCrsR_srid`(ptr) + e$`to_crs_string` <- `SedonaCrsR_to_crs_string`(ptr) + e$`to_json` <- `SedonaCrsR_to_json`(ptr) + + class(e) <- c("sedonadb::SedonaCrsR", "SedonaCrsR", "savvy_sedonadb__sealed") + e +} + + +#' R-exposed wrapper for CRS (Coordinate Reference System) introspection +#' +#' This wraps an Arc and exposes its methods to R. +`SedonaCrsR` <- new.env(parent = emptyenv()) + +### associated functions for SedonaCrsR + +class(`SedonaCrsR`) <- c("sedonadb::SedonaCrsR__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaCrsR__bundle` <- function(x, ...) { + cat('sedonadb::SedonaCrsR\n') +} + ### wrapper functions for SedonaDBExpr `SedonaDBExpr_alias` <- function(self) { @@ -503,3 +568,62 @@ class(`SedonaDBExprFactory`) <- c( `print.sedonadb::SedonaDBExprFactory__bundle` <- function(x, ...) { cat('sedonadb::SedonaDBExprFactory\n') } + +### wrapper functions for SedonaTypeR + +`SedonaTypeR_crs` <- function(self) { + function() { + .savvy_wrap_SedonaCrsR(.Call(savvy_SedonaTypeR_crs__impl, `self`)) + } +} + +`SedonaTypeR_crs_display` <- function(self) { + function() { + .Call(savvy_SedonaTypeR_crs_display__impl, `self`) + } +} + +`SedonaTypeR_logical_type_name` <- function(self) { + function() { + .Call(savvy_SedonaTypeR_logical_type_name__impl, `self`) + } +} + +`SedonaTypeR_name` <- function(self) { + function() { + .Call(savvy_SedonaTypeR_name__impl, `self`) + } +} + +`.savvy_wrap_SedonaTypeR` <- function(ptr) { + e <- new.env(parent = emptyenv()) + e$.ptr <- ptr + e$`crs` <- `SedonaTypeR_crs`(ptr) + e$`crs_display` <- `SedonaTypeR_crs_display`(ptr) + e$`logical_type_name` <- `SedonaTypeR_logical_type_name`(ptr) + e$`name` <- `SedonaTypeR_name`(ptr) + + class(e) <- c("sedonadb::SedonaTypeR", "SedonaTypeR", "savvy_sedonadb__sealed") + e +} + + +#' R-exposed wrapper for SedonaType introspection +#' +#' This allows R code to inspect Arrow schema fields and determine +#' if they are geometry types with CRS information. +`SedonaTypeR` <- new.env(parent = emptyenv()) + +### associated functions for SedonaTypeR + +`SedonaTypeR`$`new` <- function(`schema_xptr`) { + .savvy_wrap_SedonaTypeR(.Call(savvy_SedonaTypeR_new__impl, `schema_xptr`)) +} + + +class(`SedonaTypeR`) <- c("sedonadb::SedonaTypeR__bundle", "savvy_sedonadb__sealed") + +#' @export +`print.sedonadb::SedonaTypeR__bundle` <- function(x, ...) { + cat('sedonadb::SedonaTypeR\n') +} diff --git a/r/sedonadb/R/crs.R b/r/sedonadb/R/crs.R new file mode 100644 index 000000000..f06ec9892 --- /dev/null +++ b/r/sedonadb/R/crs.R @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#' Parse CRS from GeoArrow metadata +#' +#' @param crs_json A JSON string representing the CRS (PROJJSON or authority code) +#' @returns A list with components: authority_code (e.g., "EPSG:5070"), srid (integer), +#' name (character string with a human-readable CRS name), and proj_string (character +#' string with the PROJ representation of the CRS), or \code{NULL} when no CRS +#' information is available, when the \code{"crs"} field is not present in the +#' metadata, or when parsing the CRS information fails. +#' @keywords internal +sd_parse_crs <- function(crs_json) { + parse_crs_metadata(crs_json) +} diff --git a/r/sedonadb/R/dataframe.R b/r/sedonadb/R/dataframe.R index fefc3a3d2..a7abffa81 100644 --- a/r/sedonadb/R/dataframe.R +++ b/r/sedonadb/R/dataframe.R @@ -321,6 +321,41 @@ as_nanoarrow_array_stream.sedonadb_dataframe <- function(x, ..., schema = NULL) #' @export print.sedonadb_dataframe <- function(x, ..., width = NULL, n = NULL) { + # Print class header + schema <- nanoarrow::infer_nanoarrow_schema(x) + ncols <- length(schema$children) + + cat(sprintf("# A sedonadb_dataframe: ? x %d\n", ncols)) + + # Print geometry column info using SedonaTypeR wrapper + geo_col_info <- character() + for (col_name in names(schema$children)) { + child <- schema$children[[col_name]] + sd_type <- tryCatch( + SedonaTypeR$new(child), + error = function(e) NULL + ) + if (!is.null(sd_type)) { + logical_type <- sd_type$logical_type_name() + if (logical_type == "geometry" || logical_type == "geography") { + crs_display <- sd_type$crs_display() + geo_col_info <- c(geo_col_info, sprintf("%s%s", col_name, crs_display)) + } + } + } + + if (length(geo_col_info) > 0) { + if (is.null(width)) { + width <- getOption("width") + } + + geo_line <- sprintf("# Geometry: %s", paste(geo_col_info, collapse = ", ")) + if (nchar(geo_line) > width) { + geo_line <- paste0(substr(geo_line, 1, width - 3), "...") + } + cat(paste0(geo_line, "\n")) + } + if (isTRUE(getOption("sedonadb.interactive", TRUE))) { sd_preview(x, n = n, width = width) } else { diff --git a/r/sedonadb/R/pkg-sf.R b/r/sedonadb/R/pkg-sf.R index 7140f6a03..b1b6a8968 100644 --- a/r/sedonadb/R/pkg-sf.R +++ b/r/sedonadb/R/pkg-sf.R @@ -29,7 +29,7 @@ as_sedonadb_dataframe.sf <- function(x, ..., schema = NULL) { as_sedonadb_dataframe(new_sedonadb_dataframe(ctx, df), schema = schema) } -# dynamically registered in zzz.R +#' @exportS3Method sf::st_as_sf # nolint start: object_name_linter st_as_sf.sedonadb_dataframe <- function(x, ...) { stream <- nanoarrow::nanoarrow_allocate_array_stream() diff --git a/r/sedonadb/man/sd_parse_crs.Rd b/r/sedonadb/man/sd_parse_crs.Rd new file mode 100644 index 000000000..e0419289b --- /dev/null +++ b/r/sedonadb/man/sd_parse_crs.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/crs.R +\name{sd_parse_crs} +\alias{sd_parse_crs} +\title{Parse CRS from GeoArrow metadata} +\usage{ +sd_parse_crs(crs_json) +} +\arguments{ +\item{crs_json}{A JSON string representing the CRS (PROJJSON or authority code)} +} +\value{ +A list with components: authority_code (e.g., "EPSG:5070"), srid (integer), +name (character string with a human-readable CRS name), and proj_string (character +string with the PROJ representation of the CRS), or \code{NULL} when no CRS +information is available, when the \code{"crs"} field is not present in the +metadata, or when parsing the CRS information fails. +} +\description{ +Parse CRS from GeoArrow metadata +} +\keyword{internal} diff --git a/r/sedonadb/src/init.c b/r/sedonadb/src/init.c index 0e9efae4b..40a49d4c8 100644 --- a/r/sedonadb/src/init.c +++ b/r/sedonadb/src/init.c @@ -75,6 +75,11 @@ SEXP savvy_init_r_runtime_interrupts__impl(SEXP c_arg__interrupts_call, return handle_result(res); } +SEXP savvy_parse_crs_metadata__impl(SEXP c_arg__crs_json) { + SEXP res = savvy_parse_crs_metadata__ffi(c_arg__crs_json); + return handle_result(res); +} + SEXP savvy_sedonadb_adbc_init_func__impl(void) { SEXP res = savvy_sedonadb_adbc_init_func__ffi(); return handle_result(res); @@ -212,6 +217,31 @@ SEXP savvy_InternalDataFrame_to_view__impl(SEXP self__, SEXP c_arg__ctx, return handle_result(res); } +SEXP savvy_SedonaCrsR_authority_code__impl(SEXP self__) { + SEXP res = savvy_SedonaCrsR_authority_code__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaCrsR_display__impl(SEXP self__) { + SEXP res = savvy_SedonaCrsR_display__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaCrsR_srid__impl(SEXP self__) { + SEXP res = savvy_SedonaCrsR_srid__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaCrsR_to_crs_string__impl(SEXP self__) { + SEXP res = savvy_SedonaCrsR_to_crs_string__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaCrsR_to_json__impl(SEXP self__) { + SEXP res = savvy_SedonaCrsR_to_json__ffi(self__); + return handle_result(res); +} + SEXP savvy_SedonaDBExpr_alias__impl(SEXP self__, SEXP c_arg__name) { SEXP res = savvy_SedonaDBExpr_alias__ffi(self__, c_arg__name); return handle_result(res); @@ -281,11 +311,38 @@ SEXP savvy_SedonaDBExprFactory_scalar_function__impl(SEXP self__, return handle_result(res); } +SEXP savvy_SedonaTypeR_crs__impl(SEXP self__) { + SEXP res = savvy_SedonaTypeR_crs__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaTypeR_crs_display__impl(SEXP self__) { + SEXP res = savvy_SedonaTypeR_crs_display__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaTypeR_logical_type_name__impl(SEXP self__) { + SEXP res = savvy_SedonaTypeR_logical_type_name__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaTypeR_name__impl(SEXP self__) { + SEXP res = savvy_SedonaTypeR_name__ffi(self__); + return handle_result(res); +} + +SEXP savvy_SedonaTypeR_new__impl(SEXP c_arg__schema_xptr) { + SEXP res = savvy_SedonaTypeR_new__ffi(c_arg__schema_xptr); + return handle_result(res); +} + static const R_CallMethodDef CallEntries[] = { {"savvy_configure_proj_shared__impl", (DL_FUNC)&savvy_configure_proj_shared__impl, 3}, {"savvy_init_r_runtime_interrupts__impl", (DL_FUNC)&savvy_init_r_runtime_interrupts__impl, 2}, + {"savvy_parse_crs_metadata__impl", (DL_FUNC)&savvy_parse_crs_metadata__impl, + 1}, {"savvy_sedonadb_adbc_init_func__impl", (DL_FUNC)&savvy_sedonadb_adbc_init_func__impl, 0}, {"savvy_InternalContext_data_frame_from_array_stream__impl", @@ -330,6 +387,15 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_InternalDataFrame_to_provider__impl, 1}, {"savvy_InternalDataFrame_to_view__impl", (DL_FUNC)&savvy_InternalDataFrame_to_view__impl, 4}, + {"savvy_SedonaCrsR_authority_code__impl", + (DL_FUNC)&savvy_SedonaCrsR_authority_code__impl, 1}, + {"savvy_SedonaCrsR_display__impl", (DL_FUNC)&savvy_SedonaCrsR_display__impl, + 1}, + {"savvy_SedonaCrsR_srid__impl", (DL_FUNC)&savvy_SedonaCrsR_srid__impl, 1}, + {"savvy_SedonaCrsR_to_crs_string__impl", + (DL_FUNC)&savvy_SedonaCrsR_to_crs_string__impl, 1}, + {"savvy_SedonaCrsR_to_json__impl", (DL_FUNC)&savvy_SedonaCrsR_to_json__impl, + 1}, {"savvy_SedonaDBExpr_alias__impl", (DL_FUNC)&savvy_SedonaDBExpr_alias__impl, 2}, {"savvy_SedonaDBExpr_cast__impl", (DL_FUNC)&savvy_SedonaDBExpr_cast__impl, @@ -352,6 +418,13 @@ static const R_CallMethodDef CallEntries[] = { (DL_FUNC)&savvy_SedonaDBExprFactory_new__impl, 1}, {"savvy_SedonaDBExprFactory_scalar_function__impl", (DL_FUNC)&savvy_SedonaDBExprFactory_scalar_function__impl, 3}, + {"savvy_SedonaTypeR_crs__impl", (DL_FUNC)&savvy_SedonaTypeR_crs__impl, 1}, + {"savvy_SedonaTypeR_crs_display__impl", + (DL_FUNC)&savvy_SedonaTypeR_crs_display__impl, 1}, + {"savvy_SedonaTypeR_logical_type_name__impl", + (DL_FUNC)&savvy_SedonaTypeR_logical_type_name__impl, 1}, + {"savvy_SedonaTypeR_name__impl", (DL_FUNC)&savvy_SedonaTypeR_name__impl, 1}, + {"savvy_SedonaTypeR_new__impl", (DL_FUNC)&savvy_SedonaTypeR_new__impl, 1}, {NULL, NULL, 0}}; void R_init_sedonadb(DllInfo *dll) { diff --git a/r/sedonadb/src/rust/Cargo.toml b/r/sedonadb/src/rust/Cargo.toml index 2ce10cfdf..5c6f6aa26 100644 --- a/r/sedonadb/src/rust/Cargo.toml +++ b/r/sedonadb/src/rust/Cargo.toml @@ -39,5 +39,6 @@ sedona-expr = { workspace = true } sedona-geoparquet = { workspace = true } sedona-proj = { workspace = true } sedona-schema = { workspace = true } +serde_json = { workspace = true } thiserror = { workspace = true } tokio = { workspace = true } diff --git a/r/sedonadb/src/rust/api.h b/r/sedonadb/src/rust/api.h index fac6258bd..f4e648c38 100644 --- a/r/sedonadb/src/rust/api.h +++ b/r/sedonadb/src/rust/api.h @@ -21,6 +21,7 @@ SEXP savvy_configure_proj_shared__ffi(SEXP c_arg__shared_library_path, SEXP savvy_init_r_runtime__ffi(DllInfo *c_arg___dll_info); SEXP savvy_init_r_runtime_interrupts__ffi(SEXP c_arg__interrupts_call, SEXP c_arg__pkg_env); +SEXP savvy_parse_crs_metadata__ffi(SEXP c_arg__crs_json); SEXP savvy_sedonadb_adbc_init_func__ffi(void); // methods and associated functions for InternalContext @@ -61,6 +62,13 @@ SEXP savvy_InternalDataFrame_to_view__ffi(SEXP self__, SEXP c_arg__ctx, SEXP c_arg__table_ref, SEXP c_arg__overwrite); +// methods and associated functions for SedonaCrsR +SEXP savvy_SedonaCrsR_authority_code__ffi(SEXP self__); +SEXP savvy_SedonaCrsR_display__ffi(SEXP self__); +SEXP savvy_SedonaCrsR_srid__ffi(SEXP self__); +SEXP savvy_SedonaCrsR_to_crs_string__ffi(SEXP self__); +SEXP savvy_SedonaCrsR_to_json__ffi(SEXP self__); + // methods and associated functions for SedonaDBExpr SEXP savvy_SedonaDBExpr_alias__ffi(SEXP self__, SEXP c_arg__name); SEXP savvy_SedonaDBExpr_cast__ffi(SEXP self__, SEXP c_arg__schema_xptr); @@ -84,3 +92,10 @@ SEXP savvy_SedonaDBExprFactory_new__ffi(SEXP c_arg__ctx); SEXP savvy_SedonaDBExprFactory_scalar_function__ffi(SEXP self__, SEXP c_arg__name, SEXP c_arg__args); + +// methods and associated functions for SedonaTypeR +SEXP savvy_SedonaTypeR_crs__ffi(SEXP self__); +SEXP savvy_SedonaTypeR_crs_display__ffi(SEXP self__); +SEXP savvy_SedonaTypeR_logical_type_name__ffi(SEXP self__); +SEXP savvy_SedonaTypeR_name__ffi(SEXP self__); +SEXP savvy_SedonaTypeR_new__ffi(SEXP c_arg__schema_xptr); diff --git a/r/sedonadb/src/rust/src/lib.rs b/r/sedonadb/src/rust/src/lib.rs index 842519087..44747539e 100644 --- a/r/sedonadb/src/rust/src/lib.rs +++ b/r/sedonadb/src/rust/src/lib.rs @@ -17,12 +17,14 @@ // Example functions use std::ffi::c_void; +use std::sync::Arc; use savvy::savvy; use savvy_ffi::R_NilValue; use sedona_adbc::AdbcSedonadbDriverInit; use sedona_proj::register::{configure_global_proj_engine, ProjCrsEngineBuilder}; +use sedona_schema::crs::CoordinateReferenceSystem; mod context; mod dataframe; @@ -67,3 +69,194 @@ fn configure_proj_shared( configure_global_proj_engine(builder)?; Ok(()) } + +#[savvy] +fn parse_crs_metadata(crs_json: &str) -> savvy::Result { + use sedona_schema::crs::deserialize_crs_from_obj; + + // The input is GeoArrow extension metadata, which is a JSON object like: + // {"crs": } + // We need to extract the "crs" field first. + let metadata: serde_json::Value = serde_json::from_str(crs_json) + .map_err(|e| savvy::Error::new(format!("Failed to parse metadata JSON: {e}")))?; + + if let Some(crs_val) = metadata.get("crs") { + if crs_val.is_null() { + return Ok(savvy::NullSexp.into()); + } + + let crs = deserialize_crs_from_obj(crs_val)?; + match crs { + Some(crs_obj) => { + let auth_code = crs_obj.to_authority_code().ok().flatten(); + let srid = crs_obj.srid().ok().flatten(); + let name = crs_val.get("name").and_then(|v| v.as_str()); + let proj_string = crs_obj.to_crs_string(); + + let mut out = savvy::OwnedListSexp::new(4, true)?; + out.set_name(0, "authority_code")?; + out.set_name(1, "srid")?; + out.set_name(2, "name")?; + out.set_name(3, "proj_string")?; + + if let Some(auth_code) = auth_code { + out.set_value(0, savvy::Sexp::try_from(auth_code.as_str())?)?; + } else { + out.set_value(0, savvy::NullSexp)?; + } + + if let Some(srid) = srid { + out.set_value(1, savvy::Sexp::try_from(srid as i32)?)?; + } else { + out.set_value(1, savvy::NullSexp)?; + } + + if let Some(name) = name { + out.set_value(2, savvy::Sexp::try_from(name)?)?; + } else { + out.set_value(2, savvy::NullSexp)?; + } + out.set_value(3, savvy::Sexp::try_from(proj_string.as_str())?)?; + + Ok(out.into()) + } + None => Ok(savvy::NullSexp.into()), + } + } else { + Ok(savvy::NullSexp.into()) + } +} + +/// R-exposed wrapper for CRS (Coordinate Reference System) introspection +/// +/// This wraps an Arc and exposes its methods to R. +#[savvy] +pub struct SedonaCrsR { + inner: Arc, +} + +#[savvy] +impl SedonaCrsR { + /// Get the SRID (e.g., 4326 for WGS84) or NULL if not an EPSG code + fn srid(&self) -> savvy::Result { + match self.inner.srid() { + Ok(Some(srid)) => savvy::Sexp::try_from(srid as i32), + Ok(None) => Ok(savvy::NullSexp.into()), + Err(e) => Err(savvy::Error::new(format!("Failed to get SRID: {e}"))), + } + } + + /// Get the authority code (e.g., "EPSG:4326") or NULL if not available + fn authority_code(&self) -> savvy::Result { + match self.inner.to_authority_code() { + Ok(Some(code)) => savvy::Sexp::try_from(code.as_str()), + Ok(None) => Ok(savvy::NullSexp.into()), + Err(e) => Err(savvy::Error::new(format!( + "Failed to get authority code: {e}" + ))), + } + } + + /// Get the JSON representation of the CRS + fn to_json(&self) -> savvy::Result { + savvy::Sexp::try_from(self.inner.to_json().as_str()) + } + + /// Get the PROJ-compatible CRS string representation + fn to_crs_string(&self) -> savvy::Result { + savvy::Sexp::try_from(self.inner.to_crs_string().as_str()) + } + + /// Get a formatted display string (e.g., "EPSG:4326" or "{...}") + fn display(&self) -> savvy::Result { + let display = if let Ok(Some(auth)) = self.inner.to_authority_code() { + auth + } else { + format!("{}", self.inner.as_ref()) + }; + savvy::Sexp::try_from(display.as_str()) + } +} + +/// R-exposed wrapper for SedonaType introspection +/// +/// This allows R code to inspect Arrow schema fields and determine +/// if they are geometry types with CRS information. +#[savvy] +pub struct SedonaTypeR { + inner: sedona_schema::datatypes::SedonaType, + name: String, +} + +#[savvy] +impl SedonaTypeR { + /// Create a SedonaTypeR from a nanoarrow schema (external pointer) + /// + /// The schema should be a single field (column) schema, not a struct schema. + fn new(schema_xptr: savvy::Sexp) -> savvy::Result { + use sedona_schema::datatypes::SedonaType; + + let field = crate::ffi::import_field(schema_xptr)?; + let name = field.name().clone(); + + // Use existing SedonaType infrastructure to parse the field + let inner = SedonaType::from_storage_field(&field) + .map_err(|e| savvy::Error::new(format!("Failed to create SedonaType: {e}")))?; + + Ok(SedonaTypeR { inner, name }) + } + + /// Get the logical type name ("geometry", "geography", "utf8", etc.) + fn logical_type_name(&self) -> savvy::Result { + savvy::Sexp::try_from(self.inner.logical_type_name().as_str()) + } + + /// Get the column name + fn name(&self) -> savvy::Result { + savvy::Sexp::try_from(self.name.as_str()) + } + + /// Get the CRS wrapper object, or NULL if no CRS is present + /// + /// This returns a SedonaCrsR object that can be used to inspect the CRS. + fn crs(&self) -> savvy::Result { + use sedona_schema::datatypes::SedonaType; + + match &self.inner { + SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => { + if let Some(crs_arc) = crs { + Ok(SedonaCrsR { + inner: crs_arc.clone(), + }) + } else { + Err(savvy::Error::new("No CRS available for this geometry type")) + } + } + _ => Err(savvy::Error::new("No CRS available for non-geometry types")), + } + } + + /// Get a formatted CRS display string like " (CRS: EPSG:4326)" or empty string + fn crs_display(&self) -> savvy::Result { + use sedona_schema::datatypes::SedonaType; + + match &self.inner { + SedonaType::Wkb(_, crs) | SedonaType::WkbView(_, crs) => { + if let Some(crs_ref) = crs { + // Try to get authority code first (usually EPSG:XXXX) + let auth = crs_ref.to_authority_code().ok().flatten(); + let display = if let Some(auth) = auth { + format!(" (CRS: {})", auth) + } else { + // Fallback to the Display impl which might be lowercase or PROJJSON + format!(" (CRS: {})", crs_ref) + }; + savvy::Sexp::try_from(display.as_str()) + } else { + savvy::Sexp::try_from("") + } + } + _ => savvy::Sexp::try_from(""), + } + } +} diff --git a/r/sedonadb/tests/testthat/_snaps/crs.md b/r/sedonadb/tests/testthat/_snaps/crs.md new file mode 100644 index 000000000..e576d60ba --- /dev/null +++ b/r/sedonadb/tests/testthat/_snaps/crs.md @@ -0,0 +1,211 @@ +# sd_parse_crs works for GeoArrow metadata with EPSG + + Code + sedonadb:::sd_parse_crs(meta) + Output + $authority_code + [1] "EPSG:5070" + + $srid + [1] 5070 + + $name + [1] "NAD83 / Conus Albers" + + $proj_string + [1] "{\"id\":{\"authority\":\"EPSG\",\"code\":5070},\"name\":\"NAD83 / Conus Albers\"}" + + +# sd_parse_crs works for Engineering CRS (no EPSG ID) + + Code + sedonadb:::sd_parse_crs(meta) + Output + $authority_code + NULL + + $srid + NULL + + $name + [1] "Construction Site Local Grid" + + $proj_string + [1] "{\"coordinate_system\":{\"axis\":[{\"abbreviation\":\"N\",\"direction\":\"north\",\"name\":\"Northing\",\"unit\":\"metre\"},{\"abbreviation\":\"E\",\"direction\":\"east\",\"name\":\"Easting\",\"unit\":\"metre\"}],\"subtype\":\"Cartesian\"},\"datum\":{\"name\":\"Local Datum\",\"type\":\"EngineeringDatum\"},\"name\":\"Construction Site Local Grid\",\"type\":\"EngineeringCRS\"}" + + +# sd_parse_crs returns NULL if crs field is missing + + Code + sedonadb:::sd_parse_crs("{\"something_else\": 123}") + Output + NULL + +--- + + Code + sedonadb:::sd_parse_crs("{}") + Output + NULL + +# sd_parse_crs handles invalid JSON gracefully + + Code + sedonadb:::sd_parse_crs("invalid json") + Condition + Error: + ! Failed to parse metadata JSON: expected value at line 1 column 1 + +# sd_parse_crs works with plain strings if that's what's in 'crs' + + Code + sedonadb:::sd_parse_crs(meta) + Output + $authority_code + [1] "OGC:CRS84" + + $srid + [1] 4326 + + $name + NULL + + $proj_string + [1] "OGC:CRS84" + + +# print.sedonadb_dataframe shows CRS info for geometry column with EPSG + + Code + print(df, n = 0) + Output + # A sedonadb_dataframe: ? x 1 + # Geometry: geom (CRS: OGC:CRS84) + +----------+ + | geom | + | geometry | + +----------+ + +----------+ + Preview of up to 0 row(s) + +# print.sedonadb_dataframe shows CRS info with different SRID + + Code + print(df, n = 0) + Output + # A sedonadb_dataframe: ? x 1 + # Geometry: geom (CRS: EPSG:5070) + +----------+ + | geom | + | geometry | + +----------+ + +----------+ + Preview of up to 0 row(s) + +# print.sedonadb_dataframe shows multiple geometry columns with CRS + + Code + print(df, n = 0) + Output + # A sedonadb_dataframe: ? x 2 + # Geometry: geom1 (CRS: OGC:CRS84), geom2 (CRS: EPSG:5070) + +----------+----------+ + | geom1 | geom2 | + | geometry | geometry | + +----------+----------+ + +----------+----------+ + Preview of up to 0 row(s) + +# print.sedonadb_dataframe handles geometry without explicit CRS + + Code + print(df, n = 0) + Output + # A sedonadb_dataframe: ? x 1 + # Geometry: geom + +----------+ + | geom | + | geometry | + +----------+ + +----------+ + Preview of up to 0 row(s) + +# print.sedonadb_dataframe respects width parameter for geometry line + + Code + print(df, n = 0, width = 60) + Output + # A sedonadb_dataframe: ? x 2 + # Geometry: very_long_geometry_column_name_1 (CRS: OGC:CR... + +-----------------------------+----------------------------+ + | very_long_geometry_column_n | very_long_geometry_column_ | + | ame_1... | name_2... | + +-----------------------------+----------------------------+ + +-----------------------------+----------------------------+ + Preview of up to 0 row(s) + +# sd_parse_crs handles empty string + + Code + sedonadb:::sd_parse_crs("") + Condition + Error: + ! Failed to parse metadata JSON: EOF while parsing a value at line 1 column 0 + +# sd_parse_crs handles CRS with only name, no ID + + Code + sedonadb:::sd_parse_crs(meta) + Output + $authority_code + NULL + + $srid + NULL + + $name + [1] "Custom Geographic CRS" + + $proj_string + [1] "{\"name\":\"Custom Geographic CRS\",\"type\":\"GeographicCRS\"}" + + +# sd_parse_crs handles OGC:CRS84 + + Code + sedonadb:::sd_parse_crs(meta) + Output + $authority_code + [1] "OGC:CRS84" + + $srid + [1] 4326 + + $name + NULL + + $proj_string + [1] "OGC:CRS84" + + +# SedonaTypeR$crs_display() uses uppercase authority codes + + Code + sd_type$crs_display() + Output + [1] " (CRS: OGC:CRS84)" + +--- + + Code + sd_type5070$crs_display() + Output + [1] " (CRS: EPSG:5070)" + +# SedonaCrsR$display() uses uppercase authority codes + + Code + crs$display() + Output + [1] "OGC:CRS84" + diff --git a/r/sedonadb/tests/testthat/test-crs.R b/r/sedonadb/tests/testthat/test-crs.R new file mode 100644 index 000000000..8fc88a4ae --- /dev/null +++ b/r/sedonadb/tests/testthat/test-crs.R @@ -0,0 +1,270 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test_that("sd_parse_crs works for GeoArrow metadata with EPSG", { + meta <- '{"crs": {"id": {"authority": "EPSG", "code": 5070}, "name": "NAD83 / Conus Albers"}}' + expect_snapshot(sd_parse_crs(meta)) +}) + +test_that("sd_parse_crs works for Engineering CRS (no EPSG ID)", { + # A realistic example of a local engineering CRS that wouldn't have an EPSG code + meta <- '{ + "crs": { + "type": "EngineeringCRS", + "name": "Construction Site Local Grid", + "datum": { + "type": "EngineeringDatum", + "name": "Local Datum" + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + {"name": "Northing", "abbreviation": "N", "direction": "north", "unit": "metre"}, + {"name": "Easting", "abbreviation": "E", "direction": "east", "unit": "metre"} + ] + } + } + }' + expect_snapshot(sd_parse_crs(meta)) +}) + +test_that("sd_parse_crs returns NULL if crs field is missing", { + expect_snapshot(sd_parse_crs('{"something_else": 123}')) + expect_snapshot(sd_parse_crs('{}')) +}) + +test_that("sd_parse_crs handles invalid JSON gracefully", { + expect_snapshot( + sd_parse_crs('invalid json'), + error = TRUE + ) +}) + +test_that("sd_parse_crs works with plain strings if that's what's in 'crs'", { + meta <- '{"crs": "EPSG:4326"}' + expect_snapshot(sd_parse_crs(meta)) +}) + +# Tests for CRS display in print.sedonadb_dataframe + +test_that("print.sedonadb_dataframe shows CRS info for geometry column with EPSG", { + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 4326) as geom") + expect_snapshot(print(df, n = 0)) +}) + +test_that("print.sedonadb_dataframe shows CRS info with different SRID", { + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 5070) as geom") + expect_snapshot(print(df, n = 0)) +}) + +test_that("print.sedonadb_dataframe shows multiple geometry columns with CRS", { + df <- sd_sql( + " + SELECT + ST_SetSRID(ST_Point(1, 2), 4326) as geom1, + ST_SetSRID(ST_Point(3, 4), 5070) as geom2 + " + ) + expect_snapshot(print(df, n = 0)) +}) + +test_that("print.sedonadb_dataframe handles geometry without explicit CRS", { + # ST_Point without ST_SetSRID may not have CRS metadata + df <- sd_sql("SELECT ST_Point(1, 2) as geom") + expect_snapshot(print(df, n = 0)) +}) + +test_that("print.sedonadb_dataframe respects width parameter for geometry line", { + df <- sd_sql( + " + SELECT + ST_SetSRID(ST_Point(1, 2), 4326) as very_long_geometry_column_name_1, + ST_SetSRID(ST_Point(3, 4), 4326) as very_long_geometry_column_name_2 + " + ) + # Use a narrow width to trigger truncation + expect_snapshot(print(df, n = 0, width = 60)) +}) + +# Additional edge cases for sd_parse_crs + +test_that("sd_parse_crs handles NULL input", { + expect_error( + sd_parse_crs(NULL), + "must be character" + ) +}) + +test_that("sd_parse_crs handles empty string", { + expect_snapshot( + sd_parse_crs(""), + error = TRUE + ) +}) + +test_that("sd_parse_crs handles CRS with only name, no ID", { + meta <- '{ + "crs": { + "type": "GeographicCRS", + "name": "Custom Geographic CRS" + } + }' + expect_snapshot(sd_parse_crs(meta)) +}) + +test_that("sd_parse_crs handles OGC:CRS84", { + # Common case in GeoParquet/GeoArrow + + meta <- '{"crs": "OGC:CRS84"}' + expect_snapshot(sd_parse_crs(meta)) +}) + +# Explicit tests for Rust wrappers to ensure uppercase casing + +test_that("SedonaTypeR$crs_display() uses uppercase authority codes", { + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 4326) as geom") + schema <- nanoarrow::infer_nanoarrow_schema(df) + sd_type <- SedonaTypeR$new(schema$children$geom) + expect_snapshot(sd_type$crs_display()) + + df5070 <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 5070) as geom") + sd_type5070 <- SedonaTypeR$new( + nanoarrow::infer_nanoarrow_schema(df5070)$children$geom + ) + expect_snapshot(sd_type5070$crs_display()) +}) + +test_that("SedonaCrsR$display() uses uppercase authority codes", { + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 4326) as geom") + sd_type <- SedonaTypeR$new( + nanoarrow::infer_nanoarrow_schema(df)$children$geom + ) + crs <- sd_type$crs() + expect_snapshot(crs$display()) +}) + +# CRS preservation through data creation paths + +test_that("CRS is preserved when creating from data.frame with geometry", { + df <- as_sedonadb_dataframe( + data.frame( + geom = wk::as_wkb(wk::wkt("POINT (0 1)", crs = "EPSG:32620")) + ) + ) + + re_df <- sd_collect(df) + crs <- wk::wk_crs(re_df$geom) + expect_false(is.null(crs)) + # Check that the CRS contains EPSG:32620 info + expect_true( + grepl("32620", as.character(crs)) || + grepl("32620", jsonlite::toJSON(crs, auto_unbox = TRUE)) + ) +}) + +test_that("CRS is preserved through nanoarrow stream roundtrip", { + r_df <- data.frame( + geom = wk::as_wkb(wk::wkt("POINT (0 1)", crs = "EPSG:4326")) + ) + + stream <- nanoarrow::as_nanoarrow_array_stream(r_df) + df <- as_sedonadb_dataframe(stream, lazy = FALSE) + re_df <- sd_collect(df) + + crs <- wk::wk_crs(re_df$geom) + expect_false(is.null(crs)) +}) + +test_that("Different CRS values are preserved independently", { + # Create geometry with non-default CRS + df <- sd_sql( + " + SELECT + ST_SetSRID(ST_Point(1, 2), 4326) as geom_wgs84, + ST_SetSRID(ST_Point(3, 4), 32632) as geom_utm + " + ) + + re_df <- sd_collect(df) + + # Both geometries should have CRS metadata + crs1 <- wk::wk_crs(re_df$geom_wgs84) + crs2 <- wk::wk_crs(re_df$geom_utm) + + expect_false(is.null(crs1)) + expect_false(is.null(crs2)) +}) + +# Parquet roundtrip with CRS + +test_that("CRS is preserved through parquet write/read", { + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 4326) as geom") + + tmp_parquet_file <- tempfile(fileext = ".parquet") + on.exit(unlink(tmp_parquet_file)) + + sd_write_parquet(df, tmp_parquet_file) + df_roundtrip <- sd_read_parquet(tmp_parquet_file) + re_df <- sd_collect(df_roundtrip) + + # Verify geometry has CRS + crs <- wk::wk_crs(re_df$geom) + expect_false(is.null(crs)) +}) + +test_that("Non-standard CRS is preserved through parquet roundtrip", { + # Use a less common SRID (NAD83 / Conus Albers) + df <- sd_sql("SELECT ST_SetSRID(ST_Point(1, 2), 5070) as geom") + + tmp_parquet_file <- tempfile(fileext = ".parquet") + on.exit(unlink(tmp_parquet_file)) + + sd_write_parquet(df, tmp_parquet_file) + df_roundtrip <- sd_read_parquet(tmp_parquet_file) + re_df <- sd_collect(df_roundtrip) + + crs <- wk::wk_crs(re_df$geom) + expect_false(is.null(crs)) + # Check CRS info contains 5070 + crs_str <- jsonlite::toJSON(crs, auto_unbox = TRUE) + expect_true(grepl("5070", crs_str) || grepl("Albers", crs_str)) +}) + +# Multiple geometry columns with different CRS through operations + +test_that("Multiple geometry columns preserve their CRS after operations", { + df <- sd_sql( + " + SELECT + ST_SetSRID(ST_Point(1, 2), 4326) as point_a, + ST_SetSRID(ST_Point(3, 4), 5070) as point_b, + 'test' as name + " + ) + + # Collect and check both CRS are preserved + + re_df <- sd_collect(df) + + crs_a <- wk::wk_crs(re_df$point_a) + crs_b <- wk::wk_crs(re_df$point_b) + + expect_false(is.null(crs_a)) + expect_false(is.null(crs_b)) + # They should be different + expect_false(identical(crs_a, crs_b)) +})