Skip to content

Commit 4bbcdd8

Browse files
authored
Merge pull request #151 from 3DBAG/fix/ahn-adjustments
Adjustments to the AHN assets
2 parents f43dac4 + 0388e96 commit 4bbcdd8

File tree

6 files changed

+163
-141
lines changed

6 files changed

+163
-141
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,15 @@ You can read about the details on how can you deploy it in the [deployment secti
4646
## Integration as a library
4747

4848
The 3dbag-pipeline can be used as a library in other projects.
49-
The packages can be installed directly from GitHub using specific release versions:
49+
The packages can be installed directly from GitHub using specific release versions.
50+
Note that you must use the [uv](https://docs.astral.sh/uv/) package manager to install the packages, because *pip* cannot resolve the relative package paths within this repository.
5051

5152
```bash
5253
# Install specific release version of the common package
53-
pip install "bag3d-common @ git+https://github.com/3DBAG/[email protected]#egg=bag3d-common&subdirectory=packages/common"
54+
uv pip install "bag3d-common @ git+https://github.com/3DBAG/[email protected]#egg=bag3d-common&subdirectory=packages/common"
5455

5556
# Install specific commit of the common package
56-
pip install "bag3d-common @ git+https://github.com/3DBAG/3dbag-pipeline.git@<commit-hash>#egg=bag3d-common&subdirectory=packages/common"
57+
uv pip install "bag3d-common @ git+https://github.com/3DBAG/3dbag-pipeline.git@<commit-hash>#egg=bag3d-common&subdirectory=packages/common"
5758
```
5859

5960
## License

packages/core/src/bag3d/core/assets/ahn/core.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@
88

99
logger = get_dagster_logger("ahn")
1010

11-
12-
class PartitionDefinitionAHN(StaticPartitionsDefinition):
13-
def __init__(self):
14-
super().__init__(partition_keys=sorted(list(AHN_TILE_IDS)))
11+
partition_definition_ahn = StaticPartitionsDefinition(sorted(list(AHN_TILE_IDS)))
1512

1613

1714
def format_laz_log(fpath: Path, msg: str) -> str:

packages/core/src/bag3d/core/assets/ahn/download.py

Lines changed: 43 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@
77

88
from bag3d.common.utils.requests import download_file, download_as_str
99
from bag3d.core.assets.ahn.core import (
10-
PartitionDefinitionAHN,
1110
format_laz_log,
1211
download_ahn_index,
1312
ahn_laz_dir,
13+
partition_definition_ahn,
1414
)
1515

1616
logger = get_dagster_logger("ahn.download")
@@ -141,7 +141,7 @@ def tile_index_ahn(context):
141141

142142
@asset(
143143
required_resource_keys={"file_store"},
144-
partitions_def=PartitionDefinitionAHN(),
144+
partitions_def=partition_definition_ahn,
145145
)
146146
def laz_files_ahn3(context, md5_ahn3, tile_index_ahn):
147147
"""AHN3 LAZ files as they are downloaded from PDOK.
@@ -196,7 +196,7 @@ def laz_files_ahn3(context, md5_ahn3, tile_index_ahn):
196196

197197
@asset(
198198
required_resource_keys={"file_store"},
199-
partitions_def=PartitionDefinitionAHN(),
199+
partitions_def=partition_definition_ahn,
200200
)
201201
def laz_files_ahn4(context, md5_ahn4, tile_index_ahn):
202202
"""AHN4 LAZ files as they are downloaded from PDOK.
@@ -254,7 +254,7 @@ def laz_files_ahn4(context, md5_ahn4, tile_index_ahn):
254254

255255
@asset(
256256
required_resource_keys={"file_store"},
257-
partitions_def=PartitionDefinitionAHN(),
257+
partitions_def=partition_definition_ahn,
258258
)
259259
def laz_files_ahn5(context, sha256_ahn5, tile_index_ahn):
260260
"""AHN5 LAZ files as they are downloaded from PDOK.
@@ -322,12 +322,17 @@ def get_checksums(url: str) -> Mapping[str, str]:
322322

323323

324324
def download_ahn_laz(
325-
fpath: Path, url_laz: str = None, url_base: str = None, verify_ssl: bool = False
325+
fpath: Path,
326+
url_laz: str = None,
327+
url_base: str = None,
328+
verify_ssl: bool = False,
329+
nr_retries: int = 5,
326330
) -> LAZDownload:
327331
"""Download an AHN LAZ file from the input url to the given path,
328332
if the file does not exists.
329333
330334
Args:
335+
nr_retries: The number of retries to download the file.
331336
fpath: Path to the LAZ file that may exist locally. If not it will be downloaded.
332337
url_laz: Complete URL of the file to download. If provided, 'url_base' is
333338
ignored.
@@ -340,36 +345,50 @@ def download_ahn_laz(
340345

341346
url = url_laz if url_laz is not None else "/".join([url_base, fpath.name])
342347

348+
success = False
349+
file_size = 0.0
350+
is_new = False
343351
if not fpath.is_file():
344352
logger.info(format_laz_log(fpath, "Not found. Downloading..."))
345-
fpath = download_file(
346-
url=url, target_path=fpath, chunk_size=1024 * 1024, verify=verify_ssl
347-
)
348-
if fpath is None:
349-
# Download failed
350-
logger.warning(format_laz_log(fpath, "Downloading failed!"))
351-
return LAZDownload(
352-
url=None,
353-
path=Path(),
354-
success=False,
355-
hash_name=None,
356-
hash_hexdigest=None,
357-
new=False,
358-
size=0.0,
359-
)
360-
else:
361-
is_new = True
353+
for i in range(nr_retries):
354+
try:
355+
fpath = download_file(
356+
url=url,
357+
target_path=fpath,
358+
chunk_size=1024 * 1024,
359+
verify=verify_ssl,
360+
)
361+
if fpath is None:
362+
# Download failed
363+
logger.warning(format_laz_log(fpath, "Downloading failed!"))
364+
url_laz = None
365+
fpath = Path()
366+
success = False
367+
is_new = False
368+
file_size = 0.0
369+
else:
370+
success = True
371+
is_new = True
372+
file_size = round(fpath.stat().st_size / 1e6, 2)
373+
break
374+
except ConnectionError as e:
375+
if i == 4:
376+
raise e
377+
else:
378+
logger.warning(f"Retrying ({i + 1}/5) due to {e}")
362379
else: # pragma: no cover
363380
logger.info(format_laz_log(fpath, "File already downloaded"))
381+
success = True
382+
file_size = round(fpath.stat().st_size / 1e6, 2)
364383
is_new = False
365384
return LAZDownload(
366385
url=url_laz,
367386
path=fpath,
368-
success=True,
387+
success=success,
369388
hash_name=None,
370389
hash_hexdigest=None,
371390
new=is_new,
372-
size=round(fpath.stat().st_size / 1e6, 2),
391+
size=file_size,
373392
)
374393

375394

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
from dagster import asset, Field, get_dagster_logger
2+
3+
from bag3d.core.assets.ahn.core import partition_definition_ahn
4+
5+
6+
logger = get_dagster_logger("ahn.index")
7+
8+
9+
@asset(
10+
config_schema={
11+
"tile_size": Field(
12+
int,
13+
default_value=250,
14+
description="Set smallest spatial area indexed to tile_size by tile_size units.",
15+
),
16+
"force": Field(
17+
bool,
18+
default_value=False,
19+
description="Force the re-index the file, even if it is already indexed.",
20+
),
21+
},
22+
required_resource_keys={"lastools"},
23+
partitions_def=partition_definition_ahn,
24+
)
25+
def lasindex_ahn3(context, laz_files_ahn3):
26+
"""Append a spatial index to the AHN3 LAZ file, using LASTools's `lasindex`.
27+
28+
See https://lastools.osgeo.org/download/lasindex_README.txt.
29+
"""
30+
cmd_list = [
31+
"{exe}",
32+
"-i {local_path}",
33+
"-append",
34+
"-tile_size",
35+
str(context.op_execution_context.op_config["tile_size"]),
36+
]
37+
if context.op_execution_context.op_config["force"] is False:
38+
cmd_list.append("-dont_reindex")
39+
context.resources.lastools.app.execute(
40+
"lasindex", " ".join(cmd_list), local_path=laz_files_ahn3.path
41+
)
42+
43+
44+
@asset(
45+
config_schema={
46+
"tile_size": Field(
47+
int,
48+
default_value=250,
49+
description="Set smallest spatial area indexed to tile_size by tile_size units.",
50+
),
51+
"force": Field(
52+
bool,
53+
default_value=False,
54+
description="Force the re-index the file, even if it is already indexed.",
55+
),
56+
},
57+
required_resource_keys={"lastools"},
58+
partitions_def=partition_definition_ahn,
59+
)
60+
def lasindex_ahn4(context, laz_files_ahn4):
61+
"""Append a spatial index to the AHN4 LAZ file, using LASTools's `lasindex`.
62+
63+
See https://lastools.osgeo.org/download/lasindex_README.txt.
64+
"""
65+
cmd_list = [
66+
"{exe}",
67+
"-i {local_path}",
68+
"-append",
69+
"-tile_size",
70+
str(context.op_execution_context.op_config["tile_size"]),
71+
]
72+
if context.op_execution_context.op_config["force"] is False:
73+
cmd_list.append("-dont_reindex")
74+
context.resources.lastools.app.execute(
75+
"lasindex", " ".join(cmd_list), local_path=laz_files_ahn4.path
76+
)
77+
78+
79+
@asset(
80+
config_schema={
81+
"tile_size": Field(
82+
int,
83+
default_value=250,
84+
description="Set smallest spatial area indexed to tile_size by tile_size units.",
85+
),
86+
"force": Field(
87+
bool,
88+
default_value=False,
89+
description="Force the re-index the file, even if it is already indexed.",
90+
),
91+
},
92+
required_resource_keys={"lastools"},
93+
partitions_def=partition_definition_ahn,
94+
)
95+
def lasindex_ahn5(context, laz_files_ahn5):
96+
"""Append a spatial index to the AHN5 LAZ file, using LASTools's `lasindex`.
97+
98+
See https://lastools.osgeo.org/download/lasindex_README.txt.
99+
"""
100+
cmd_list = [
101+
"{exe}",
102+
"-i {local_path}",
103+
"-append",
104+
"-tile_size",
105+
str(context.op_execution_context.op_config["tile_size"]),
106+
]
107+
if context.op_execution_context.op_config["force"] is False:
108+
cmd_list.append("-dont_reindex")
109+
context.resources.lastools.app.execute(
110+
"lasindex", " ".join(cmd_list), local_path=laz_files_ahn5.path
111+
)

packages/core/src/bag3d/core/assets/ahn/metadata.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from bag3d.common.utils.geodata import pdal_info
1010
from bag3d.common.utils.database import create_schema, load_sql
11-
from bag3d.core.assets.ahn.core import PartitionDefinitionAHN
11+
from bag3d.core.assets.ahn.core import partition_definition_ahn
1212

1313

1414
@asset(required_resource_keys={"db_connection"})
@@ -41,7 +41,7 @@ def metadata_table_ahn5(context):
4141
),
4242
},
4343
required_resource_keys={"pdal", "db_connection"},
44-
partitions_def=PartitionDefinitionAHN(),
44+
partitions_def=partition_definition_ahn,
4545
)
4646
def metadata_ahn3(context, laz_files_ahn3, metadata_table_ahn3, tile_index_ahn):
4747
"""Metadata of the AHN3 LAZ file, retrieved from the PDOK tile index and
@@ -64,7 +64,7 @@ def metadata_ahn3(context, laz_files_ahn3, metadata_table_ahn3, tile_index_ahn):
6464
),
6565
},
6666
required_resource_keys={"pdal", "db_connection"},
67-
partitions_def=PartitionDefinitionAHN(),
67+
partitions_def=partition_definition_ahn,
6868
)
6969
def metadata_ahn4(context, laz_files_ahn4, metadata_table_ahn4, tile_index_ahn):
7070
"""Metadata of the AHN4 LAZ file, retrieved from the PDOK tile index and
@@ -87,7 +87,7 @@ def metadata_ahn4(context, laz_files_ahn4, metadata_table_ahn4, tile_index_ahn):
8787
),
8888
},
8989
required_resource_keys={"pdal", "db_connection"},
90-
partitions_def=PartitionDefinitionAHN(),
90+
partitions_def=partition_definition_ahn,
9191
)
9292
def metadata_ahn5(context, laz_files_ahn5, metadata_table_ahn5, tile_index_ahn):
9393
"""Metadata of the AHN5 LAZ file, retrieved from the PDOK tile index and

0 commit comments

Comments
 (0)