Skip to content

Commit 0abec5b

Browse files
authored
Merge pull request #293 from 3DBAG/279-αην5-new-checksums
3dbag-pipeline-279 ΑΗΝ5 new checksums
2 parents 9dea86e + f4c46c8 commit 0abec5b

File tree

2 files changed

+36
-16
lines changed

2 files changed

+36
-16
lines changed

packages/core/src/bag3d/core/assets/ahn/download.py

Lines changed: 29 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from pathlib import Path
23
from typing import Mapping, Union
34
from hashlib import new as hash_new, algorithms_available
@@ -17,9 +18,9 @@
1718

1819
# AHN LAZ file with checksums.
1920
URL_LAZ_SHA = {
20-
"ahn5": "https://gist.githubusercontent.com/GinaStavropoulou/4f6b70bd6d356c3a06434916bfa627e0/raw/a87213a9643446b485d5a1b8f5c7416bad1a05f5/01_LAZ.SHA256",
21-
"ahn4": "https://gist.githubusercontent.com/fwrite/6bb4ad23335c861f9f3162484e57a112/raw/ee5274c7c6cf42144d569e303cf93bcede3e2da1/AHN4.md5",
22-
"ahn3": "https://gist.githubusercontent.com/arbakker/dcca00384cddbdf10c0421ed26d8911c/raw/f43465d287a654254e21851cce38324eba75d03c/checksum_laz.md5",
21+
5: "https://fsn1.your-objectstorage.com/hwh-portal/20230609_tmp/links/nationaal/Nederland/AHN5_PC.json",
22+
4: "https://gist.githubusercontent.com/fwrite/6bb4ad23335c861f9f3162484e57a112/raw/ee5274c7c6cf42144d569e303cf93bcede3e2da1/AHN4.md5",
23+
3: "https://gist.githubusercontent.com/arbakker/dcca00384cddbdf10c0421ed26d8911c/raw/f43465d287a654254e21851cce38324eba75d03c/checksum_laz.md5",
2324
}
2425

2526

@@ -127,19 +128,19 @@ def validate(
127128
@asset
128129
def md5_ahn3(context):
129130
"""Download the MD5 sums that are calculated by PDOK for the AHN3 LAZ files."""
130-
return get_checksums(URL_LAZ_SHA["ahn3"])
131+
return get_checksums(URL_LAZ_SHA, ahn_version=3)
131132

132133

133134
@asset
134135
def md5_ahn4(context):
135136
"""Download the MD5 sums that are calculated by PDOK for the AHN4 LAZ files."""
136-
return get_checksums(URL_LAZ_SHA["ahn4"])
137+
return get_checksums(URL_LAZ_SHA, ahn_version=4)
137138

138139

139140
@asset
140141
def sha256_ahn5(context):
141142
"""Download the SHA256 sums for the AHN5 LAZ files, provided by AHN."""
142-
return get_checksums(URL_LAZ_SHA["ahn5"])
143+
return get_checksums(URL_LAZ_SHA, ahn_version=5)
143144

144145

145146
@asset
@@ -330,17 +331,33 @@ def laz_files_ahn5(context, config: LazFilesConfig, sha256_ahn5, tile_index_ahn)
330331
return Output(lazdownload, metadata=lazdownload.asdict())
331332

332333

333-
def get_checksums(url: str) -> Mapping[str, str]:
334-
"""Download the checksums of AHN3/4/5 LAZ files.
334+
def get_checksums(url_map: Mapping[int, str], ahn_version: int) -> Mapping[str, str]:
335+
"""
336+
Get the AHN LAZ file checksums for the given AHN version.
337+
338+
Args:
339+
url_map (Mapping[int, str]): A mapping between AHN versions as keys and
340+
their corresponding checksum file URL as values.
341+
ahn_version (int): The version of AHN.
335342
336343
Returns:
337-
{ filename: checksum }
344+
Mapping[str, str]: A dictionary where the keys are filenames and the values
345+
are their corresponding SHA-256 or MD5 checksums.
338346
"""
347+
url = url_map[ahn_version]
339348
_hashes = download_as_str(url)
340349
checksums = {}
341-
for tile in _hashes.strip().split("\n"):
342-
sha, file = tile.split()
343-
checksums[file] = sha
350+
if ahn_version == 5:
351+
# We have a GeoJSON FeatureCollection
352+
for feature in json.loads(_hashes)["features"]:
353+
if properties := feature.get("properties"):
354+
if file_url := properties.get("file"):
355+
filename = file_url.split("/")[-1]
356+
checksums[filename] = properties.get("sha256")
357+
else:
358+
for tile in _hashes.strip().split("\n"):
359+
sha, file = tile.split()
360+
checksums[file] = sha
344361
return checksums
345362

346363

packages/core/tests/test_assets_ahn.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,15 @@ def test_download_ahn_index_geometry():
3737

3838

3939
@pytest.mark.parametrize(
40-
"url",
41-
(URL_LAZ_SHA["ahn3"], URL_LAZ_SHA["ahn4"], URL_LAZ_SHA["ahn5"]),
40+
"ahn_version",
41+
(3, 4, 5),
4242
ids=("ahn3", "ahn4", "ahn5"),
4343
)
44-
def test_get_checksums(url):
45-
checksums = get_checksums(url)
44+
def test_get_checksums(ahn_version):
45+
checksums = get_checksums(URL_LAZ_SHA, ahn_version=ahn_version)
46+
print(
47+
f"Found {len(checksums)} checksums for AHN{ahn_version} LAZ files. First five:"
48+
)
4649
assert len(checksums) > 0
4750
for k, sha in list(checksums.items())[:5]:
4851
assert sha is not None

0 commit comments

Comments
 (0)