-
Notifications
You must be signed in to change notification settings - Fork 42
Open
Milestone
Description
In Python we can read GDAL/OGR sources with read_pyogrio():
sedona-db/python/sedonadb/python/sedonadb/datasource.py
Lines 112 to 172 in e80fb37
| class PyogrioFormatSpec(ExternalFormatSpec): | |
| """An `ExternalFormatSpec` implementation wrapping GDAL/OGR via pyogrio""" | |
| def __init__(self, extension=""): | |
| self._extension = extension | |
| self._options = {} | |
| def with_options(self, options): | |
| cloned = type(self)(self.extension) | |
| cloned._options.update(options) | |
| return cloned | |
| @property | |
| def extension(self) -> str: | |
| return self._extension | |
| def open_reader(self, args): | |
| import pyogrio.raw | |
| url = args.src.to_url() | |
| if url is None: | |
| raise ValueError(f"Can't convert {args.src} to OGR-openable object") | |
| if url.startswith("http://") or url.startswith("https://"): | |
| ogr_src = f"/vsicurl/{url}" | |
| elif url.startswith("file://") and sys.platform != "win32": | |
| ogr_src = url.removeprefix("file://") | |
| elif url.startswith("file:///"): | |
| ogr_src = url.removeprefix("file:///") | |
| else: | |
| raise ValueError(f"Can't open {url} with OGR") | |
| if ogr_src.endswith(".zip"): | |
| ogr_src = f"/vsizip/{ogr_src}" | |
| if args.is_projected(): | |
| file_columns = args.file_schema.names | |
| columns = [file_columns[i] for i in args.file_projection] | |
| else: | |
| columns = None | |
| batch_size = args.batch_size if args.batch_size is not None else 0 | |
| if args.filter and args.file_schema is not None: | |
| geometry_column_indices = args.file_schema.geometry_column_indices | |
| file_columns = args.file_schema.names | |
| if len(geometry_column_indices) == 1: | |
| bbox = args.filter.bounding_box( | |
| file_columns[geometry_column_indices[0]] | |
| ) | |
| else: | |
| bbox = None | |
| else: | |
| bbox = None | |
| return PyogrioReaderShelter( | |
| pyogrio.raw.ogr_open_arrow( | |
| ogr_src, {}, columns=columns, batch_size=batch_size, bbox=bbox | |
| ), | |
| columns, | |
| ) |
This works by leveraging the ArrowArrayStream API as exposed by pyogrio so we don't need to ship a separate GDAL build.
In R it's a significant performance hit to have this read go materialize to sf ( apache/sedona#2576 ). We may need to add something to the sf package to allow the read to skip materialization; however, the sf package does implement the array stream interface at least a little: https://github.com/r-spatial/sf/blob/25700c2cb48191598bfc7495baafe4b6808398c6/R/read.R#L221-L265 , possibly enough to prototype.
Metadata
Metadata
Assignees
Labels
No labels