Skip to content

Commit b06cdc6

Browse files
authored
Ensure special keys are not in content when loaded (#112)
* Ensure special keys not in content when loaded The loader pop'd keys like collection off the item, then dehydrated the item to be used as the 'content'. With collection removed prior to dehydration, it was flagged with the "do-not-merge" marker because the key is on the base_item. Instead, ensure that id, collection, and geometry are not in content as they are stored on the table row and shouldn't participate in hydration. Additionally, bbox was previously a derived value at search runtime but was recently changed to a returned value if it existed on the item. However, during loading, the bbox was dropped so would never exist on the persisted item. * Simplify geometry check
1 parent 3c61e03 commit b06cdc6

File tree

2 files changed

+39
-6
lines changed

2 files changed

+39
-6
lines changed

pypgstac/pypgstac/load.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -511,8 +511,8 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict:
511511

512512
base_item, key, partition_trunc = self.collection_json(item["collection"])
513513

514-
out["id"] = item.pop("id")
515-
out["collection"] = item.pop("collection")
514+
out["id"] = item.get("id")
515+
out["collection"] = item.get("collection")
516516
properties: dict = item.get("properties", {})
517517

518518
dt = properties.get("datetime")
@@ -544,16 +544,21 @@ def format_item(self, _item: Union[Path, str, dict]) -> dict:
544544

545545
out["partition"] = partition
546546

547-
bbox = item.pop("bbox")
548-
geojson = item.pop("geometry")
549-
if geojson is None and bbox is not None:
547+
geojson = item.get("geometry")
548+
if geojson is None:
550549
geometry = None
551550
else:
552551
geometry = str(Geometry.from_geojson(geojson).wkb)
553552
out["geometry"] = geometry
554553

555554
content = dehydrate(base_item, item)
556555

556+
# Remove keys from the dehydrated item content which are stored directly
557+
# on the table row.
558+
content.pop("id", None)
559+
content.pop("collection", None)
560+
content.pop("geometry", None)
561+
557562
out["content"] = orjson.dumps(content).decode()
558563

559564
return out

pypgstac/tests/test_load.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Tests for pypgstac."""
2+
import json
23
from pathlib import Path
3-
from pypgstac.load import Methods, Loader
4+
from pypgstac.load import Methods, Loader, read_json
45
from psycopg.errors import UniqueViolation
56
import pytest
67

@@ -239,3 +240,30 @@ def test_load_items_dehydrated_ignore_succeeds(loader: Loader) -> None:
239240
loader.load_items(
240241
str(TEST_DEHYDRATED_ITEMS), insert_mode=Methods.ignore, dehydrated=True
241242
)
243+
244+
245+
def test_format_items_keys(loader: Loader) -> None:
246+
"""Test pypgstac items ignore loader."""
247+
loader.load_collections(
248+
str(TEST_COLLECTIONS_JSON),
249+
insert_mode=Methods.ignore,
250+
)
251+
252+
items_iter = read_json(str(TEST_ITEMS))
253+
item_json = next(iter(items_iter))
254+
out = loader.format_item(item_json)
255+
256+
# Top level keys expected after format
257+
assert "id" in out
258+
assert "collection" in out
259+
assert "geometry" in out
260+
assert "content" in out
261+
262+
# Special keys expected not to be in the item content
263+
content_json = json.loads(out["content"])
264+
assert "id" not in content_json
265+
assert "collection" not in content_json
266+
assert "geometry" not in content_json
267+
268+
# Ensure bbox is included in content
269+
assert "bbox" in content_json

0 commit comments

Comments
 (0)