From 9a7166c49579f49d4cb737ef55083a0a8ce26813 Mon Sep 17 00:00:00 2001 From: GinaStavropoulou Date: Mon, 18 Aug 2025 08:37:29 +0200 Subject: [PATCH 1/5] not storing metadata for tiles without hash matched --- .../src/bag3d/core/assets/ahn/download.py | 37 +++++++++++++------ .../src/bag3d/core/assets/ahn/metadata.py | 7 ++++ 2 files changed, 32 insertions(+), 12 deletions(-) diff --git a/packages/core/src/bag3d/core/assets/ahn/download.py b/packages/core/src/bag3d/core/assets/ahn/download.py index 89aae5c1..ffd4b282 100644 --- a/packages/core/src/bag3d/core/assets/ahn/download.py +++ b/packages/core/src/bag3d/core/assets/ahn/download.py @@ -186,7 +186,11 @@ def laz_files_ahn3(context, config: LazFilesConfig, md5_ahn3, tile_index_ahn): # Let's try to re-download the file once if not first_validation: - logger.info(format_laz_log(fpath, "Removing")) + logger.info( + format_laz_log( + fpath, "First validation failed. Removing and retrying..." + ) + ) fpath.unlink() lazdownload = download_ahn_laz( fpath=fpath, url_laz=url_laz, verify_ssl=verify_ssl @@ -195,7 +199,7 @@ def laz_files_ahn3(context, config: LazFilesConfig, md5_ahn3, tile_index_ahn): sha_reference=md5_ahn3, sha_func=HashChunkwise("md5") ) if not second_validation: - logger.error(format_laz_log(fpath, "ERROR")) + logger.error(format_laz_log(fpath, "ERROR: second validation failed!")) lazdownload = LAZDownload( url=None, path=Path(), @@ -206,7 +210,7 @@ def laz_files_ahn3(context, config: LazFilesConfig, md5_ahn3, tile_index_ahn): size=0.0, ) else: - logger.debug(format_laz_log(fpath, "OK")) + logger.debug(format_laz_log(fpath, "Validation OK")) return Output(lazdownload, metadata=lazdownload.asdict()) @@ -245,7 +249,11 @@ def laz_files_ahn4(context, config: LazFilesConfig, md5_ahn4, tile_index_ahn): # Let's try to re-download the file once if not first_validation: - logger.info(format_laz_log(fpath, "Removing")) + logger.info( + format_laz_log( + fpath, "First validation failed. Removing and retrying..." + ) + ) fpath.unlink() lazdownload = download_ahn_laz( fpath=fpath, @@ -256,7 +264,8 @@ def laz_files_ahn4(context, config: LazFilesConfig, md5_ahn4, tile_index_ahn): sha_reference=md5_ahn4, sha_func=HashChunkwise("md5") ) if not second_validation: - logger.error(format_laz_log(fpath, "ERROR")) + logger.error(format_laz_log(fpath, "ERROR: second validation failed!")) + fpath.unlink() lazdownload = LAZDownload( url=None, path=Path(), @@ -267,7 +276,7 @@ def laz_files_ahn4(context, config: LazFilesConfig, md5_ahn4, tile_index_ahn): size=0.0, ) else: - logger.debug(format_laz_log(fpath, "OK")) + logger.debug(format_laz_log(fpath, "Validation OK")) return Output(lazdownload, metadata=lazdownload.asdict()) @@ -303,7 +312,11 @@ def laz_files_ahn5(context, config: LazFilesConfig, sha256_ahn5, tile_index_ahn) ) # Let's try to re-download the file once if not first_validation: - logger.info(format_laz_log(fpath, "Removing")) + logger.info( + format_laz_log( + fpath, "First validation failed. Removing and retrying..." + ) + ) fpath.unlink() lazdownload = download_ahn_laz( fpath=fpath, @@ -314,7 +327,7 @@ def laz_files_ahn5(context, config: LazFilesConfig, sha256_ahn5, tile_index_ahn) sha_reference=sha256_ahn5, sha_func=HashChunkwise("sha256") ) if not second_validation: - logger.error(format_laz_log(fpath, "ERROR")) + logger.error(format_laz_log(fpath, "ERROR: second validation failed!")) lazdownload = LAZDownload( url=None, path=Path(), @@ -325,7 +338,7 @@ def laz_files_ahn5(context, config: LazFilesConfig, sha256_ahn5, tile_index_ahn) size=0.0, ) else: - logger.debug(format_laz_log(fpath, "OK")) + logger.debug(format_laz_log(fpath, "Validation OK")) return Output(lazdownload, metadata=lazdownload.asdict()) @@ -380,14 +393,14 @@ def download_ahn_laz( ) else: # pragma: no cover logger.info(format_laz_log(fpath, "File already downloaded")) + success = True + file_size = round(fpath.stat().st_size / 1e6, 2) + is_new = False if force_download: logger.info(format_laz_log(fpath, "Forcing re-download")) file_size, fpath, is_new, success, url_laz = download_laz( file_size, fpath, is_new, nr_retries, success, url, url_laz, verify_ssl ) - success = True - file_size = round(fpath.stat().st_size / 1e6, 2) - is_new = False return LAZDownload( url=url_laz, path=fpath, diff --git a/packages/core/src/bag3d/core/assets/ahn/metadata.py b/packages/core/src/bag3d/core/assets/ahn/metadata.py index 787e8f8c..069db658 100644 --- a/packages/core/src/bag3d/core/assets/ahn/metadata.py +++ b/packages/core/src/bag3d/core/assets/ahn/metadata.py @@ -200,6 +200,13 @@ def compute_load_metadata( """ tile_id = context.partition_key conn = context.resources.db_connection.connect + if not laz_files_ahn.success or laz_files_ahn.hash_name is None: + context.log.info( + f"LAZ tile {tile_id} has not been successfully downloaded." + f"Skipping metadata computation..." + ) + return Output(None) + if not laz_files_ahn.new: if not context.op_execution_context.op_execution_context.op_config["force"]: context.log.info( From 840aca6c898d9b40f804d4a830876b0839b677ba Mon Sep 17 00:00:00 2001 From: GinaStavropoulou Date: Tue, 19 Aug 2025 19:54:10 +0200 Subject: [PATCH 2/5] --readers.las.nosrs true --- .../common/src/bag3d/common/utils/geodata.py | 26 ++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/packages/common/src/bag3d/common/utils/geodata.py b/packages/common/src/bag3d/common/utils/geodata.py index 7c8d58d2..d0341412 100644 --- a/packages/common/src/bag3d/common/utils/geodata.py +++ b/packages/common/src/bag3d/common/utils/geodata.py @@ -9,12 +9,15 @@ TableSchema, TableColumnConstraints, TableColumn, + get_dagster_logger, ) from pgutils import PostgresTableIdentifier from bag3d.common.utils.database import postgrestable_metadata from bag3d.common.resources.executables import AppImage +logger = get_dagster_logger() + def wkt_from_bbox(bbox): minx, miny, maxx, maxy = bbox @@ -256,9 +259,26 @@ def pdal_info( ] cmd_list.append("--all") if with_all else cmd_list.append("--metadata") cmd_list.append("{local_path}") - return_code, output = pdal.execute( - "pdal", command=" ".join(cmd_list), local_path=file_path, silent=(not verbose) - ) + try: + return_code, output = pdal.execute( + "pdal", + command=" ".join(cmd_list), + local_path=file_path, + silent=(not verbose), + ) + except Exception as e: + if "Global encoding WKT flag" in str(e): + logger.warning(f"Pdal failed for tile {file_path} with error {e}.") + logger.warning("Setting --readers.las.nosrs true") + cmd_list.append("--readers.las.nosrs true") + return_code, output = pdal.execute( + "pdal", + command=" ".join(cmd_list), + local_path=file_path, + silent=(not verbose), + ) + else: + raise output_processed = output.replace("\\u0000", "") return return_code, json.loads(output_processed) From b788bf1679f7092c2ee04991a6925b2858425396 Mon Sep 17 00:00:00 2001 From: GinaStavropoulou Date: Tue, 19 Aug 2025 19:56:43 +0200 Subject: [PATCH 3/5] removing previous work --- packages/core/src/bag3d/core/assets/ahn/metadata.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/packages/core/src/bag3d/core/assets/ahn/metadata.py b/packages/core/src/bag3d/core/assets/ahn/metadata.py index c28ea97b..9c078fac 100644 --- a/packages/core/src/bag3d/core/assets/ahn/metadata.py +++ b/packages/core/src/bag3d/core/assets/ahn/metadata.py @@ -200,12 +200,6 @@ def compute_load_metadata( """ tile_id = context.partition_key conn = context.resources.db_connection.connect - if not laz_files_ahn.success or laz_files_ahn.hash_name is None: - context.log.info( - f"LAZ tile {tile_id} has not been successfully downloaded." - f"Skipping metadata computation..." - ) - return Output(None) if not laz_files_ahn.new: if not context.op_execution_context.op_execution_context.op_config["force"]: From 907ae73b64580a941276facf127a9eb9724e4582 Mon Sep 17 00:00:00 2001 From: GinaStavropoulou Date: Tue, 19 Aug 2025 19:57:48 +0200 Subject: [PATCH 4/5] setting up sucess for force_download --- packages/core/src/bag3d/core/assets/ahn/download.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/core/src/bag3d/core/assets/ahn/download.py b/packages/core/src/bag3d/core/assets/ahn/download.py index 5cad21c1..06390035 100644 --- a/packages/core/src/bag3d/core/assets/ahn/download.py +++ b/packages/core/src/bag3d/core/assets/ahn/download.py @@ -390,9 +390,6 @@ def download_ahn_laz( file_size, fpath, is_new, success, url_laz = download_laz( file_size, fpath, is_new, nr_retries, success, url, url_laz, verify_ssl ) - success = True - file_size = round(fpath.stat().st_size / 1e6, 2) - is_new = False if not success: raise Failure(format_laz_log(fpath, "Downloading failed!")) From 624fdd0beffde3a70d860e96dae3925a92c09004 Mon Sep 17 00:00:00 2001 From: GinaStavropoulou Date: Tue, 19 Aug 2025 19:58:41 +0200 Subject: [PATCH 5/5] removed unecessary space --- packages/core/src/bag3d/core/assets/ahn/metadata.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/core/src/bag3d/core/assets/ahn/metadata.py b/packages/core/src/bag3d/core/assets/ahn/metadata.py index 9c078fac..9e7767bc 100644 --- a/packages/core/src/bag3d/core/assets/ahn/metadata.py +++ b/packages/core/src/bag3d/core/assets/ahn/metadata.py @@ -200,7 +200,6 @@ def compute_load_metadata( """ tile_id = context.partition_key conn = context.resources.db_connection.connect - if not laz_files_ahn.new: if not context.op_execution_context.op_execution_context.op_config["force"]: context.log.info(