diff --git a/README.md b/README.md index 299e49c9..9ad8b077 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ You will find the complete documentation at [https://innovation.3dbag.nl/3dbag-p ## Project layout -The 3dbag-pipeline are organized into several packages. +The 3dbag-pipeline is organized into several packages. The packages are organized into a `common` package and a number of workflow packages. The `common` package contains the resources, functions and type definitions that are used by the 3DBAG packages that define the data processing workflows. The workflow packages contain the assets, jobs, sensors etc. that define a data processing workflow for a part of the complete 3DBAG. diff --git a/packages/core/src/bag3d/core/__init__.py b/packages/core/src/bag3d/core/__init__.py index bb45d91c..d9c7dc2a 100644 --- a/packages/core/src/bag3d/core/__init__.py +++ b/packages/core/src/bag3d/core/__init__.py @@ -1405,4 +1405,5 @@ "04fn2", "40cz2", "14bz1", + "18hz2", } diff --git a/packages/core/src/bag3d/core/assets/ahn/core.py b/packages/core/src/bag3d/core/assets/ahn/core.py index 6ec9c9fd..4197dfe4 100644 --- a/packages/core/src/bag3d/core/assets/ahn/core.py +++ b/packages/core/src/bag3d/core/assets/ahn/core.py @@ -19,11 +19,6 @@ def format_laz_log(fpath: Path, msg: str) -> str: return f"{fpath.stem}{'.' * 5}{msg}" -def ahn_filename(tile_id: str) -> str: - """Creates an AHN LAZ file name from an AHN tile ID.""" - return f"C_{tile_id.upper()}.LAZ" - - def ahn_dir(root_dir: Path, ahn_version: int) -> Path: """Return a directory path where to store the AHN LAZ files for the given AHN version.""" diff --git a/packages/core/src/bag3d/core/assets/ahn/download.py b/packages/core/src/bag3d/core/assets/ahn/download.py index ba05ebe8..b4ecde3b 100644 --- a/packages/core/src/bag3d/core/assets/ahn/download.py +++ b/packages/core/src/bag3d/core/assets/ahn/download.py @@ -307,42 +307,6 @@ def laz_files_ahn5(context, sha256_ahn5, tile_index_ahn): return Output(lazdownload, metadata=lazdownload.asdict()) -# @multi_asset( -# required_resource_keys={"file_store", "pdal"}, -# partitions_def=PartitionDefinitionAHN(), -# ins={ -# "md5_ahn3": AssetIn(["ahn", "md5_ahn3"]) -# }, -# outs={ -# "laz_files_ahn3": Out(is_required=False), -# "pdal_info_ahn3": Out(is_required=False) -# }, -# can_subset=True -# ) -# def multi_laz_files_ahn3(context, md5_ahn3): -# """AHN3 LAZ files as they are downloaded from PDOK. -# -# Only downlaod a file if it does not exist locally, or the SHA of the file does not -# match the reference. -# """ -# tile_id = context.partition_key -# -# fpath = context.resources.file_store.file_store.data_dir / ahn_filename(tile_id) -# fpath = Path(f"/data/AHN3/tiles_200m/t_{tile_id}.laz") -# if "laz_files" in context.selected_output_names: -# fpath = context.resources.file_store.file_store.data_dir / ahn_filename(tile_id) -# lazdownload = download_ahn_laz(fpath=fpath, url_base=URL_LAZ["ahn3"], -# sha_reference=md5_ahn3, -# sha_func=HashChunkwise("md5")) -# fpath = lazdownload.path -# yield Output(lazdownload.path, output_name="laz_files_ahn3", -# metadata=lazdownload.asdict()) -# if "pdal_info" in context.selected_output_names: -# ret_code, out_info = pdal_info(context.resources.pdal, file_path=fpath, -# with_all=False) -# yield Output(fpath, metadata={**out_info}, output_name="pdal_info_ahn3") - - def get_checksums(url: str) -> Mapping[str, str]: """Download the checksums of AHN3/4/5 LAZ files. diff --git a/packages/core/src/bag3d/core/assets/ahn/tile.py b/packages/core/src/bag3d/core/assets/ahn/tile.py index af803db0..bf66759e 100644 --- a/packages/core/src/bag3d/core/assets/ahn/tile.py +++ b/packages/core/src/bag3d/core/assets/ahn/tile.py @@ -13,9 +13,10 @@ PartitionDefinitionAHN, ahn_dir, ahn_laz_dir, - ahn_filename, ) +import os + # The tile index bbox was computed from download_ahn_index(3, True) PDOK_TILE_INDEX_BBOX = (13000, 306250, 279000, 616250) @@ -271,24 +272,26 @@ def partition_laz_with_grid( out_dir = ahn_dir( context.resources.file_store.file_store.data_dir, ahn_version=ahn_version ).joinpath(f"tiles_{cellsize}m") - out_dir.mkdir(exist_ok=True) + out_dir.mkdir(exist_ok=True, parents=True) future_to_tile = {} failed = [] + ahn_path = ahn_laz_dir( + context.resources.file_store.file_store.data_dir, + ahn_version=ahn_version, + ) + # Dictionary of all files in the AHN path + files_in_ahn_path = { + f[-9:-4].lower(): os.path.join(ahn_path, f) + for f in os.listdir(ahn_path) + if os.path.isfile(os.path.join(ahn_path, f)) + } + with ThreadPoolExecutor(max_workers=max_workers) as executor: for tile, xmin, ymin, pdok_match in tile_ids: tile_size = cellsize out_file = out_dir / f"t_{tile}.laz" cmd = ["{exe}", "-v", "-i"] - cmd.extend( - str( - ahn_laz_dir( - context.resources.file_store.file_store.data_dir, - ahn_version=ahn_version, - ) - / ahn_filename(t) - ) - for t in pdok_match - ) + cmd.extend(str(files_in_ahn_path[t.lower()]) for t in pdok_match) cmd += [ "-inside_tile", str(xmin), diff --git a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py index 645e0974..92037993 100644 --- a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py +++ b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py @@ -191,6 +191,12 @@ def create_roofer_config( laz_files_ahn4 = [ str(out_dir_ahn4 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res ] + out_dir_ahn5 = ahn_dir( + context.resources.file_store.file_store.data_dir, ahn_version=5 + ).joinpath("tiles_200m") + laz_files_ahn5 = [ + str(out_dir_ahn5 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res + ] # Would be neater if we could use -sql in the OGR connection to do this query, # instead of creating a view. tile_view = PostgresTableIdentifier(tiles.schema, f"t_{tile_id}") @@ -218,6 +224,7 @@ def create_roofer_config( footprint_file=f"PG:{context.resources.db_connection.connect.dsn} tables={tile_view}", ahn3_files=laz_files_ahn3, ahn4_files=laz_files_ahn4, + ahn5_files=laz_files_ahn5, output_path=output_dir, ) path_toml = output_dir / "roofer.toml"