|
| 1 | +import json |
1 | 2 | from pathlib import Path
|
2 | 3 | from typing import Mapping, Union
|
3 | 4 | from hashlib import new as hash_new, algorithms_available
|
|
17 | 18 |
|
18 | 19 | # AHN LAZ file with checksums.
|
19 | 20 | URL_LAZ_SHA = {
|
20 |
| - "ahn5": "https://gist.githubusercontent.com/GinaStavropoulou/4f6b70bd6d356c3a06434916bfa627e0/raw/a87213a9643446b485d5a1b8f5c7416bad1a05f5/01_LAZ.SHA256", |
21 |
| - "ahn4": "https://gist.githubusercontent.com/fwrite/6bb4ad23335c861f9f3162484e57a112/raw/ee5274c7c6cf42144d569e303cf93bcede3e2da1/AHN4.md5", |
22 |
| - "ahn3": "https://gist.githubusercontent.com/arbakker/dcca00384cddbdf10c0421ed26d8911c/raw/f43465d287a654254e21851cce38324eba75d03c/checksum_laz.md5", |
| 21 | + 5: "https://fsn1.your-objectstorage.com/hwh-portal/20230609_tmp/links/nationaal/Nederland/AHN5_PC.json", |
| 22 | + 4: "https://gist.githubusercontent.com/fwrite/6bb4ad23335c861f9f3162484e57a112/raw/ee5274c7c6cf42144d569e303cf93bcede3e2da1/AHN4.md5", |
| 23 | + 3: "https://gist.githubusercontent.com/arbakker/dcca00384cddbdf10c0421ed26d8911c/raw/f43465d287a654254e21851cce38324eba75d03c/checksum_laz.md5", |
23 | 24 | }
|
24 | 25 |
|
25 | 26 |
|
@@ -127,19 +128,19 @@ def validate(
|
127 | 128 | @asset
|
128 | 129 | def md5_ahn3(context):
|
129 | 130 | """Download the MD5 sums that are calculated by PDOK for the AHN3 LAZ files."""
|
130 |
| - return get_checksums(URL_LAZ_SHA["ahn3"]) |
| 131 | + return get_checksums(URL_LAZ_SHA, ahn_version=3) |
131 | 132 |
|
132 | 133 |
|
133 | 134 | @asset
|
134 | 135 | def md5_ahn4(context):
|
135 | 136 | """Download the MD5 sums that are calculated by PDOK for the AHN4 LAZ files."""
|
136 |
| - return get_checksums(URL_LAZ_SHA["ahn4"]) |
| 137 | + return get_checksums(URL_LAZ_SHA, ahn_version=4) |
137 | 138 |
|
138 | 139 |
|
139 | 140 | @asset
|
140 | 141 | def sha256_ahn5(context):
|
141 | 142 | """Download the SHA256 sums for the AHN5 LAZ files, provided by AHN."""
|
142 |
| - return get_checksums(URL_LAZ_SHA["ahn5"]) |
| 143 | + return get_checksums(URL_LAZ_SHA, ahn_version=5) |
143 | 144 |
|
144 | 145 |
|
145 | 146 | @asset
|
@@ -330,17 +331,33 @@ def laz_files_ahn5(context, config: LazFilesConfig, sha256_ahn5, tile_index_ahn)
|
330 | 331 | return Output(lazdownload, metadata=lazdownload.asdict())
|
331 | 332 |
|
332 | 333 |
|
333 |
| -def get_checksums(url: str) -> Mapping[str, str]: |
334 |
| - """Download the checksums of AHN3/4/5 LAZ files. |
| 334 | +def get_checksums(url_map: Mapping[int, str], ahn_version: int) -> Mapping[str, str]: |
| 335 | + """ |
| 336 | + Get the AHN LAZ file checksums for the given AHN version. |
| 337 | +
|
| 338 | + Args: |
| 339 | + url_map (Mapping[int, str]): A mapping between AHN versions as keys and |
| 340 | + their corresponding checksum file URL as values. |
| 341 | + ahn_version (int): The version of AHN. |
335 | 342 |
|
336 | 343 | Returns:
|
337 |
| - { filename: checksum } |
| 344 | + Mapping[str, str]: A dictionary where the keys are filenames and the values |
| 345 | + are their corresponding SHA-256 or MD5 checksums. |
338 | 346 | """
|
| 347 | + url = url_map[ahn_version] |
339 | 348 | _hashes = download_as_str(url)
|
340 | 349 | checksums = {}
|
341 |
| - for tile in _hashes.strip().split("\n"): |
342 |
| - sha, file = tile.split() |
343 |
| - checksums[file] = sha |
| 350 | + if ahn_version == 5: |
| 351 | + # We have a GeoJSON FeatureCollection |
| 352 | + for feature in json.loads(_hashes)["features"]: |
| 353 | + if properties := feature.get("properties"): |
| 354 | + if file_url := properties.get("file"): |
| 355 | + filename = file_url.split("/")[-1] |
| 356 | + checksums[filename] = properties.get("sha256") |
| 357 | + else: |
| 358 | + for tile in _hashes.strip().split("\n"): |
| 359 | + sha, file = tile.split() |
| 360 | + checksums[file] = sha |
344 | 361 | return checksums
|
345 | 362 |
|
346 | 363 |
|
|
0 commit comments