diff --git a/docker/.env b/docker/.env index 470016ae..e2b17a67 100644 --- a/docker/.env +++ b/docker/.env @@ -6,6 +6,8 @@ DAGSTER_POSTGRES_DB="postgres_db" TOOLS_DIR=/opt/3dbag-pipeline/tools BAG3D_VENVS=/3dbag-pipeline/venvs +BAG3D_FILESTORE=/data/volume +BAG3D_RELEASE_VERSION=develop BAG3D_TEST_DATA=/data/volume BAG3D_FLOORS_ESTIMATION_MODEL=/data/volume/model/pipeline_model1_gbr_untuned.joblib BAG3D_RELEASE_VERSION=test_version @@ -27,6 +29,7 @@ TYLER_METADATA_JSON=/opt/3dbag-pipeline/tools/share/tyler/resources/geof/metadat EXE_PATH_TYLER=/opt/3dbag-pipeline/tools/bin/tyler EXE_PATH_TYLER_DB=/opt/3dbag-pipeline/tools/bin/tyler-db EXE_PATH_ROOFER_CROP=/opt/3dbag-pipeline/tools/bin/crop +EXE_PATH_ROOFER_ROOFER=/opt/3dbag-pipeline/tools/bin/roofer EXE_PATH_ROOFER_RECONSTRUCT=/opt/3dbag-pipeline/tools/bin/geof FLOWCHART_PATH_RECONSTRUCT=/opt/3dbag-pipeline/tools/share/geoflow-bundle/flowcharts/reconstruct_bag.json EXE_PATH_OGR2OGR=/opt/3dbag-pipeline/tools/bin/ogr2ogr diff --git a/docker/compose.yaml b/docker/compose.yaml index d4e4c587..0fa918a0 100644 --- a/docker/compose.yaml +++ b/docker/compose.yaml @@ -11,6 +11,8 @@ services: POSTGRES_DB: $DAGSTER_POSTGRES_DB networks: - bag3d-dev-network + volumes: + - bag3d-dev-dagster-postgresql:/var/lib/postgresql/data # This service stores the asset data that is generated by the pipeline data-postgresql: @@ -76,15 +78,21 @@ services: data-postgresql: condition: service_healthy restart: true + # Normally we wouldn't need watch:action:rebuild, but because we are using + # dagster's DockerRunLauncher, it always starts a new container from the image, for + # each run. While, watch:action:sync (or sync+restart)copies the changed local code into the + # 3dbag-pipeline-core-develop container (not the image). But since DockerRunLauncher + # doesn't actaully use this container, but always starts a new one, we have to build + # a new image on each change. develop: watch: - - action: sync + - action: rebuild path: ../packages/common target: /opt/3dbag-pipeline/packages/common - action: rebuild path: ../packages/common/pyproject.toml target: /opt/3dbag-pipeline/packages/common/pyproject.toml - - action: sync + - action: rebuild path: ../packages/core target: /opt/3dbag-pipeline/packages/core - action: rebuild @@ -115,13 +123,13 @@ services: restart: true develop: watch: - - action: sync + - action: rebuild path: ../packages/common target: /opt/3dbag-pipeline/packages/common - action: rebuild path: ../packages/common/pyproject.toml target: /opt/3dbag-pipeline/packages/common/pyproject.toml - - action: sync + - action: rebuild path: ../packages/floors_estimation target: /opt/3dbag-pipeline/packages/floors_estimation - action: rebuild @@ -151,13 +159,13 @@ services: restart: true develop: watch: - - action: sync + - action: rebuild path: ../packages/common target: /opt/3dbag-pipeline/packages/common - action: rebuild path: ../packages/common/pyproject.toml target: /opt/3dbag-pipeline/packages/common/pyproject.toml - - action: sync + - action: rebuild path: ../packages/party_walls target: /opt/3dbag-pipeline/packages/party_walls - action: rebuild @@ -228,6 +236,8 @@ volumes: external: true bag3d-dev-data-pipeline: external: true + bag3d-dev-dagster-postgresql: + external: true bag3d-dev-dagster-home: external: true diff --git a/docker/pipeline/bag3d-core.dockerfile b/docker/pipeline/bag3d-core.dockerfile index 1d163351..2b7e8f4d 100644 --- a/docker/pipeline/bag3d-core.dockerfile +++ b/docker/pipeline/bag3d-core.dockerfile @@ -1,4 +1,4 @@ -FROM 3dgi/3dbag-pipeline-tools:2024.10.31 AS develop +FROM 3dgi/3dbag-pipeline-tools:2024.11.09 AS develop ARG BAG3D_PIPELINE_LOCATION=/opt/3dbag-pipeline LABEL org.opencontainers.image.authors="Balázs Dukai " diff --git a/docker/pipeline/bag3d-floors-estimation.dockerfile b/docker/pipeline/bag3d-floors-estimation.dockerfile index 351310a8..0df7fb11 100644 --- a/docker/pipeline/bag3d-floors-estimation.dockerfile +++ b/docker/pipeline/bag3d-floors-estimation.dockerfile @@ -1,4 +1,4 @@ -FROM 3dgi/3dbag-pipeline-tools:2024.10.31 AS develop +FROM 3dgi/3dbag-pipeline-tools:2024.11.09 AS develop ARG BAG3D_PIPELINE_LOCATION=/opt/3dbag-pipeline LABEL org.opencontainers.image.authors="Balázs Dukai " diff --git a/docker/pipeline/bag3d-party-walls.dockerfile b/docker/pipeline/bag3d-party-walls.dockerfile index 1296fad3..6cee1b42 100644 --- a/docker/pipeline/bag3d-party-walls.dockerfile +++ b/docker/pipeline/bag3d-party-walls.dockerfile @@ -1,4 +1,4 @@ -FROM 3dgi/3dbag-pipeline-tools:2024.10.31 AS develop +FROM 3dgi/3dbag-pipeline-tools:2024.11.09 AS develop ARG BAG3D_PIPELINE_LOCATION=/opt/3dbag-pipeline LABEL org.opencontainers.image.authors="Balázs Dukai " diff --git a/makefile b/makefile index 9f484324..950abbf9 100644 --- a/makefile +++ b/makefile @@ -25,11 +25,13 @@ docker_volume_create: docker cp docker/dagster/dagster.yaml bag3d-dev-temp-container:/opt/dagster/dagster_home/ docker cp docker/dagster/workspace.yaml bag3d-dev-temp-container:/opt/dagster/dagster_home/ docker rm -f bag3d-dev-temp-container + docker volume create bag3d-dev-dagster-postgresql docker_volume_rm: docker volume rm -f bag3d-dev-data-pipeline docker volume rm -f bag3d-dev-data-postgresql docker volume rm -f bag3d-dev-dagster-home + docker volume rm -f bag3d-dev-dagster-postgresql docker_volume_recreate: docker_volume_rm docker_volume_create diff --git a/packages/common/src/bag3d/common/resources/__init__.py b/packages/common/src/bag3d/common/resources/__init__.py index 85d20c5a..8cc79841 100644 --- a/packages/common/src/bag3d/common/resources/__init__.py +++ b/packages/common/src/bag3d/common/resources/__init__.py @@ -40,10 +40,10 @@ file_store = FileStoreResource( - data_dir="/tmp", dir_id=os.getenv("BAG3D_RELEASE_VERSION") + data_dir=os.getenv("BAG3D_FILESTORE"), dir_id=os.getenv("BAG3D_RELEASE_VERSION") ) file_store_fastssd = FileStoreResource( - data_dir="/tmp", + data_dir=os.getenv("BAG3D_FILESTORE"), dir_id=os.getenv("BAG3D_RELEASE_VERSION"), ) @@ -62,7 +62,10 @@ exe_tyler=os.getenv("EXE_PATH_TYLER"), exe_tyler_db=os.getenv("EXE_PATH_TYLER_DB") ) -roofer = RooferResource(exe_roofer_crop=os.getenv("EXE_PATH_ROOFER_CROP")) +roofer = RooferResource( + exe_crop=os.getenv("EXE_PATH_ROOFER_CROP"), + exe_roofer=os.getenv("EXE_PATH_ROOFER_ROOFER"), +) geoflow = GeoflowResource( exe_geoflow=os.getenv("EXE_PATH_ROOFER_RECONSTRUCT"), diff --git a/packages/common/src/bag3d/common/resources/executables.py b/packages/common/src/bag3d/common/resources/executables.py index 344bb1fd..58faacd4 100644 --- a/packages/common/src/bag3d/common/resources/executables.py +++ b/packages/common/src/bag3d/common/resources/executables.py @@ -435,11 +435,12 @@ def app(self) -> AppImage: class RooferResource(ConfigurableResource): """ A RooferResource can be configured by providing the paths to - Roofer `crop` executable on the local system. + Roofer `crop` and `roofer` executables on the local system. Example: - roofer_resource = RooferResource(exe_roofer_crop=os.getenv("EXE_PATH_ROOFER_CROP")) + roofer_resource = RooferResource(exe_crop=os.getenv("EXE_PATH_ROOFER_CROP"), + exe_roofer=os.getenv("EXE_PATH_ROOFER_ROOFER")) After the resource has been instantiated, roofer (AppImage) can be acquired with the `app` property: @@ -447,11 +448,15 @@ class RooferResource(ConfigurableResource): roofer = roofer_resource.app """ - exe_roofer_crop: str + exe_crop: str + exe_roofer: str @property def exes(self) -> Dict[str, str]: - return {"crop": self.exe_roofer_crop} + return { + "crop": self.exe_crop, + "roofer": self.exe_roofer, + } @property def with_docker(self) -> bool: diff --git a/packages/common/src/bag3d/common/utils/geodata.py b/packages/common/src/bag3d/common/utils/geodata.py index 14e47762..43a55f79 100644 --- a/packages/common/src/bag3d/common/utils/geodata.py +++ b/packages/common/src/bag3d/common/utils/geodata.py @@ -216,6 +216,7 @@ def ogr2postgres( "-oo WRITE_GFS=NO", "-lco UNLOGGED=ON", "-lco SPATIAL_INDEX=NONE", + "-lco GEOMETRY_NAME=wkb_geometry", '-f PostgreSQL PG:"{dsn}"', "/vsizip/{local_path}/{dataset}_{feature_type}.gml {feature_type}", ] diff --git a/packages/core/src/bag3d/core/assets/ahn/core.py b/packages/core/src/bag3d/core/assets/ahn/core.py index 7fc719e9..6ec9c9fd 100644 --- a/packages/core/src/bag3d/core/assets/ahn/core.py +++ b/packages/core/src/bag3d/core/assets/ahn/core.py @@ -25,13 +25,13 @@ def ahn_filename(tile_id: str) -> str: def ahn_dir(root_dir: Path, ahn_version: int) -> Path: - """Create a directory path where to store the AHN LAZ files for the given AHN + """Return a directory path where to store the AHN LAZ files for the given AHN version.""" return Path(root_dir) / "pointcloud" / f"AHN{ahn_version}" def ahn_laz_dir(root_dir: Path, ahn_version: int) -> Path: - """Create a directory path where to store the AHN LAZ files for the given AHN + """Return a directory path where to store the AHN LAZ files for the given AHN version.""" return ahn_dir(root_dir, ahn_version) / "as_downloaded" / "LAZ" @@ -69,7 +69,7 @@ def download_ahn_index( "requestedEpsg": "28992", "outputFormat": "application/json", "CountDefault": "2000", - "typeName": "layerId_14b12666-cfbb-4362-905a-8832afe5ffa8", + "typeName": "layerId_1e56b6d6-3802-4246-a7ed-8f49824b85db", } logger.info(f"Downloading the AHN tile boundaries from {service_url}") diff --git a/packages/core/src/bag3d/core/assets/ahn/download.py b/packages/core/src/bag3d/core/assets/ahn/download.py index 8d03c829..ba05ebe8 100644 --- a/packages/core/src/bag3d/core/assets/ahn/download.py +++ b/packages/core/src/bag3d/core/assets/ahn/download.py @@ -134,7 +134,7 @@ def sha256_ahn5(context): @asset -def tile_index_pdok(context): +def tile_index_ahn(context): """The AHN tile index, including the tile geometry and the file download links.""" return download_ahn_index(with_geom=True) @@ -143,7 +143,7 @@ def tile_index_pdok(context): required_resource_keys={"file_store"}, partitions_def=PartitionDefinitionAHN(), ) -def laz_files_ahn3(context, md5_ahn3, tile_index_pdok): +def laz_files_ahn3(context, md5_ahn3, tile_index_ahn): """AHN3 LAZ files as they are downloaded from PDOK. The download links are retrieved from the AHN tile index service (blaadindex). @@ -153,7 +153,7 @@ def laz_files_ahn3(context, md5_ahn3, tile_index_pdok): tile_id = context.partition_key laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 3) laz_dir.mkdir(exist_ok=True, parents=True) - url_laz = tile_index_pdok[tile_id]["AHN3_LAZ"] + url_laz = tile_index_ahn[tile_id]["AHN3_LAZ"] fpath = laz_dir / url_laz.split("/")[-1] # Because https://ns_hwh.fundaments.nl is not configured properly. # Check with https://www.digicert.com/help/ @@ -198,7 +198,7 @@ def laz_files_ahn3(context, md5_ahn3, tile_index_pdok): required_resource_keys={"file_store"}, partitions_def=PartitionDefinitionAHN(), ) -def laz_files_ahn4(context, md5_ahn4, tile_index_pdok): +def laz_files_ahn4(context, md5_ahn4, tile_index_ahn): """AHN4 LAZ files as they are downloaded from PDOK. The download links are retrieved from the AHN tile index service (blaadindex). @@ -209,7 +209,7 @@ def laz_files_ahn4(context, md5_ahn4, tile_index_pdok): laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 4) laz_dir.mkdir(exist_ok=True, parents=True) - url_laz = tile_index_pdok[tile_id]["AHN4_LAZ"] + url_laz = tile_index_ahn[tile_id]["AHN4_LAZ"] fpath = laz_dir / url_laz.split("/")[-1] # Because https://ns_hwh.fundaments.nl is not configured properly. # Check with https://www.digicert.com/help/ @@ -256,7 +256,7 @@ def laz_files_ahn4(context, md5_ahn4, tile_index_pdok): required_resource_keys={"file_store"}, partitions_def=PartitionDefinitionAHN(), ) -def laz_files_ahn5(context, sha256_ahn5, tile_index_pdok): +def laz_files_ahn5(context, sha256_ahn5, tile_index_ahn): """AHN5 LAZ files as they are downloaded from PDOK. The download links are retrieved from the AHN tile index service (blaadindex). @@ -265,7 +265,7 @@ def laz_files_ahn5(context, sha256_ahn5, tile_index_pdok): tile_id = context.partition_key laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 5) laz_dir.mkdir(exist_ok=True, parents=True) - url_laz = tile_index_pdok[tile_id]["AHN5_LAZ"] + url_laz = tile_index_ahn[tile_id]["AHN5_LAZ"] fpath = laz_dir / url_laz.split("/")[-1] # Because https://ns_hwh.fundaments.nl is not configured properly. # Check with https://www.digicert.com/help/ diff --git a/packages/core/src/bag3d/core/assets/ahn/metadata.py b/packages/core/src/bag3d/core/assets/ahn/metadata.py index c4c75a71..e3a9eca8 100644 --- a/packages/core/src/bag3d/core/assets/ahn/metadata.py +++ b/packages/core/src/bag3d/core/assets/ahn/metadata.py @@ -43,12 +43,12 @@ def metadata_table_ahn5(context): required_resource_keys={"pdal", "db_connection"}, partitions_def=PartitionDefinitionAHN(), ) -def metadata_ahn3(context, laz_files_ahn3, metadata_table_ahn3, tile_index_pdok): +def metadata_ahn3(context, laz_files_ahn3, metadata_table_ahn3, tile_index_ahn): """Metadata of the AHN3 LAZ file, retrieved from the PDOK tile index and computed with 'pdal info'. The metadata is loaded into the metadata database table.""" return compute_load_metadata( - context, laz_files_ahn3, metadata_table_ahn3, tile_index_pdok + context, laz_files_ahn3, metadata_table_ahn3, tile_index_ahn ) @@ -66,12 +66,12 @@ def metadata_ahn3(context, laz_files_ahn3, metadata_table_ahn3, tile_index_pdok) required_resource_keys={"pdal", "db_connection"}, partitions_def=PartitionDefinitionAHN(), ) -def metadata_ahn4(context, laz_files_ahn4, metadata_table_ahn4, tile_index_pdok): +def metadata_ahn4(context, laz_files_ahn4, metadata_table_ahn4, tile_index_ahn): """Metadata of the AHN4 LAZ file, retrieved from the PDOK tile index and computed with 'pdal info'. The metadata is loaded into the metadata database table.""" return compute_load_metadata( - context, laz_files_ahn4, metadata_table_ahn4, tile_index_pdok + context, laz_files_ahn4, metadata_table_ahn4, tile_index_ahn ) @@ -89,12 +89,12 @@ def metadata_ahn4(context, laz_files_ahn4, metadata_table_ahn4, tile_index_pdok) required_resource_keys={"pdal", "db_connection"}, partitions_def=PartitionDefinitionAHN(), ) -def metadata_ahn5(context, laz_files_ahn5, metadata_table_ahn5, tile_index_pdok): +def metadata_ahn5(context, laz_files_ahn5, metadata_table_ahn5, tile_index_ahn): """Metadata of the AHN5 LAZ file, retrieved from the PDOK tile index and computed with 'pdal info'. The metadata is loaded into the metadata database table.""" return compute_load_metadata( - context, laz_files_ahn5, metadata_table_ahn5, tile_index_pdok + context, laz_files_ahn5, metadata_table_ahn5, tile_index_ahn ) diff --git a/packages/core/src/bag3d/core/assets/bag/download.py b/packages/core/src/bag3d/core/assets/bag/download.py index 5449e80e..0621f9aa 100644 --- a/packages/core/src/bag3d/core/assets/bag/download.py +++ b/packages/core/src/bag3d/core/assets/bag/download.py @@ -12,7 +12,6 @@ ) from lxml import objectify -from bag3d.common.utils.geodata import bbox_from_wkt from bag3d.common.utils.files import unzip from bag3d.common.utils.requests import download_file from bag3d.common.utils.database import ( @@ -298,19 +297,36 @@ def load_bag_layer( layer_zip = Path(f"{extract_dir}/9999{layer_id}{shortdate}.zip") layer_dir = Path(f"{extract_dir}/9999{layer_id}{shortdate}") unzip(layer_zip, layer_dir, remove=remove_zip) + # Create an empty layer for appending + cmd = [ + "{exe}", + "-limit 0", + "-overwrite", + "-nln {new_table}", + "-lco UNLOGGED=ON", + "-lco SPATIAL_INDEX=NONE", + ] + cmd.append("-f PostgreSQL PG:'{dsn}'") + cmd.append(str(layer_dir)) + cmd = " ".join(cmd) + return_code, output = context.resources.gdal.app.execute( + "ogr2ogr", cmd, kwargs=kwargs, local_path=extract_dir + ) + if return_code != 0: + return False + # Parallel insert cmd = [ 'parallel "{exe}', "--config PG_USE_COPY=YES", - "-overwrite", + "-append", "-nln {new_table}", "-lco UNLOGGED=ON", "-lco SPATIAL_INDEX=NONE", ] geofilter = context.op_config.get("geofilter") if geofilter: - bbox = bbox_from_wkt(geofilter) - cmd.append("-spat {bbox}") - kwargs["bbox"] = " ".join(map(str, bbox)) + cmd.append("-clipsrc {wkt}") + kwargs["wkt"] = geofilter cmd.append("-f PostgreSQL PG:'{dsn}'") cmd.append('{{}}"') cmd.append(f"::: {layer_dir}/*.xml") diff --git a/packages/core/src/bag3d/core/assets/deploy/godzilla.py b/packages/core/src/bag3d/core/assets/deploy/godzilla.py index c262cad9..1fe83e66 100644 --- a/packages/core/src/bag3d/core/assets/deploy/godzilla.py +++ b/packages/core/src/bag3d/core/assets/deploy/godzilla.py @@ -34,33 +34,6 @@ def compressed_export_nl(context, reconstruction_output_multitiles_nl): return Output(output_tarfile, metadata=metadata_output) -@asset( - ins={ - "reconstruction_output_multitiles_zuid_holland": AssetIn(key_prefix="export"), - "geopackage_nl": AssetIn(key_prefix="export"), - "export_index": AssetIn(key_prefix="export"), - "metadata": AssetIn(key_prefix="export"), - }, -) -def compressed_export_zuid_holland( - context, - reconstruction_output_multitiles_zuid_holland, - geopackage_nl, - export_index, - metadata, -): - """A .tar.gz compressed full directory tree of the exports""" - export_dir = reconstruction_output_multitiles_zuid_holland - output_tarfile = export_dir.parent / "export.tar.gz" - with tarfile.open(output_tarfile, "w:gz") as tar: - tar.add(export_dir, arcname="export") - metadata_output = { - "size [Gb]": output_tarfile.stat().st_size * 1e-9, - "path": str(output_tarfile), - } - return Output(output_tarfile, metadata=metadata_output) - - @asset(ins={"metadata": AssetIn(key_prefix="export")}) def downloadable_godzilla(context, compressed_export_nl: Path, metadata: Path): """Downloadable files hosted on godzilla. diff --git a/packages/core/src/bag3d/core/assets/input/input_for_reconstruction.py b/packages/core/src/bag3d/core/assets/input/input_for_reconstruction.py index 5bf7fc8d..005595f3 100644 --- a/packages/core/src/bag3d/core/assets/input/input_for_reconstruction.py +++ b/packages/core/src/bag3d/core/assets/input/input_for_reconstruction.py @@ -21,7 +21,7 @@ ) def reconstruction_input( context, bag_pandactueelbestaand, bag_kas_warenhuis, bag_bag_overlap -): +) -> Output[PostgresTableIdentifier]: """The input for the building reconstruction, where: - duplicates are removed """ diff --git a/packages/core/src/bag3d/core/assets/input/tile.py b/packages/core/src/bag3d/core/assets/input/tile.py index a6c8f4e9..12d28114 100644 --- a/packages/core/src/bag3d/core/assets/input/tile.py +++ b/packages/core/src/bag3d/core/assets/input/tile.py @@ -27,6 +27,9 @@ def reconstruction_input_tiles(context, reconstruction_input): conn = context.resources.db_connection.connect conn.send_query(f"CREATE SCHEMA IF NOT EXISTS {output_schema}") + # todo: dirty hack just for now for removing sslmode, couz it's not implemented in tyler-db + uri = conn.dsn.replace("sslmode=allow", "").strip() + # tiler-db creates two tables. output_schema.index and output_schema.tiles # The tiles table has 'tile_id' and 'boundary' columns. cmd = [ @@ -35,7 +38,7 @@ def reconstruction_input_tiles(context, reconstruction_input): "--drop-existing", f"--qtree-capacity {quadtree_capacity}", f"--grid-cellsize {grid_cellsize}", - f'--uri "{conn.dsn}"', + f'--uri "{uri}"', f"--table {reconstruction_input}", f"--geometry-column {geometry_column}", f"--primary-key {primary_key}", diff --git a/packages/core/src/bag3d/core/assets/reconstruction/__init__.py b/packages/core/src/bag3d/core/assets/reconstruction/__init__.py index 963cc543..e69de29b 100644 --- a/packages/core/src/bag3d/core/assets/reconstruction/__init__.py +++ b/packages/core/src/bag3d/core/assets/reconstruction/__init__.py @@ -1,232 +0,0 @@ -RECONSTRUCT_RERUN_INPUT_PARTITIONS = [ - "5/1024/1088", - "6/96/384", - "6/1024/960", - "7/16/240", - "7/112/224", - "7/128/224", - "7/16/256", - "7/16/272", - "7/176/256", - "7/160/288", - "7/128/352", - "7/128/368", - "7/144/352", - "7/192/336", - "7/160/432", - "7/192/480", - "7/272/256", - "7/288/256", - "7/672/192", - "7/752/288", - "7/1008/1008", - "7/1024/1024", - "7/1024/1040", - "8/184/224", - "8/120/328", - "8/120/352", - "8/128/344", - "8/176/344", - "8/464/368", - "8/408/408", - "8/248/536", - "8/232/560", - "8/240/544", - "8/248/552", - "8/256/536", - "8/264/544", - "8/264/552", - "8/304/568", - "8/336/560", - "8/360/640", - "8/352/776", - "8/368/896", - "8/632/40", - "8/736/40", - "8/728/712", - "8/1000/888", - "8/1008/880", - "9/12/284", - "9/288/276", - "9/316/504", - "9/364/488", - "9/384/376", - "9/388/376", - "9/392/376", - "9/392/380", - "9/444/352", - "9/460/372", - "9/488/364", - "9/412/404", - "9/416/404", - "9/444/392", - "9/472/420", - "9/476/420", - "9/220/556", - "9/240/532", - "9/240/536", - "9/240/540", - "9/244/536", - "9/232/552", - "9/236/556", - "9/224/560", - "9/228/564", - "9/236/568", - "9/244/556", - "9/248/544", - "9/248/548", - "9/252/548", - "9/240/560", - "9/240/564", - "9/240/568", - "9/244/572", - "9/248/568", - "9/252/568", - "9/256/548", - "9/260/544", - "9/256/560", - "9/256/564", - "9/268/560", - "9/316/516", - "9/296/560", - "9/296/564", - "9/300/560", - "9/300/576", - "9/316/616", - "9/324/556", - "9/324/568", - "9/324/572", - "9/332/564", - "9/352/568", - "9/352/572", - "9/456/596", - "9/472/604", - "9/476/604", - "9/388/656", - "9/388/660", - "9/392/656", - "9/400/684", - "9/408/684", - "9/508/680", - "9/400/780", - "9/412/868", - "9/468/968", - "9/656/52", - "9/512/372", - "9/600/376", - "9/600/380", - "9/520/464", - "9/520/468", - "9/524/464", - "9/600/384", - "9/600/388", - "9/660/284", - "9/664/280", - "9/744/316", - "9/804/500", - "9/532/632", - "9/728/708", - "9/716/812", - "9/724/812", - "9/892/520", - "9/892/552", - "9/956/688", - "9/960/656", - "9/928/848", - "9/872/1096", - "9/1000/1068", - "10/396/370", - "10/396/374", - "10/398/372", - "10/398/374", - "10/400/374", - "10/422/402", - "10/224/554", - "10/226/554", - "10/240/558", - "10/242/556", - "10/242/558", - "10/254/546", - "10/256/544", - "10/272/562", - "10/294/518", - "10/314/520", - "10/314/522", - "10/314/524", - "10/316/520", - "10/316/522", - "10/318/522", - "10/316/524", - "10/316/526", - "10/256/588", - "10/256/590", - "10/258/588", - "10/264/584", - "10/266/584", - "10/264/588", - "10/264/590", - "10/266/588", - "10/268/586", - "10/270/586", - "10/268/588", - "10/270/588", - "10/262/594", - "10/264/592", - "10/264/594", - "10/266/592", - "10/266/594", - "10/268/594", - "10/270/594", - "10/270/596", - "10/272/586", - "10/274/584", - "10/274/586", - "10/274/588", - "10/274/594", - "10/276/594", - "10/278/592", - "10/364/614", - "10/362/724", - "10/362/726", - "10/364/724", - "10/484/592", - "10/492/598", - "10/494/596", - "10/428/710", - "10/426/718", - "10/428/712", - "10/428/716", - "10/428/718", - "10/430/716", - "10/426/720", - "10/432/716", - "10/434/716", - "10/434/718", - "10/436/718", - "10/432/720", - "10/432/722", - "10/434/720", - "10/700/110", - "10/512/368", - "10/512/370", - "10/514/368", - "10/514/370", - "10/710/806", - "10/722/812", - "10/950/676", - "10/960/684", - "10/672/1088", - "10/672/1090", - "10/674/1090", - "11/225/553", - "11/263/592", - "11/263/593", - "11/361/725", - "11/492/595", - "11/493/595", - "11/492/597", - "11/493/597", - "11/428/721", - "11/430/723", - "11/431/723", -] diff --git a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py index 1f24bbc2..645e0974 100644 --- a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py +++ b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py @@ -1,30 +1,25 @@ -from hashlib import sha1 -from datetime import date import time +from datetime import date +from hashlib import sha1 +from pathlib import Path from dagster import ( asset, StaticPartitionsDefinition, AssetIn, - Output, Failure, get_dagster_logger, + Field, ) -from psycopg.sql import SQL from pgutils import PostgresTableIdentifier - -from bag3d.common.utils.files import geoflow_crop_dir -from bag3d.common.utils.dagster import format_date -from bag3d.common.resources.wkt import ZUID_HOLLAND +from psycopg.sql import SQL from bag3d.common.resources import resource_defs - +from bag3d.common.utils.dagster import format_date +from bag3d.common.utils.files import geoflow_crop_dir +from bag3d.core.assets.ahn.core import ahn_dir from bag3d.core.assets.input import RECONSTRUCTION_INPUT_SCHEMA from bag3d.core.assets.input.tile import get_tile_ids -from bag3d.core.assets.ahn.core import ahn_dir - -# debug -from bag3d.core.assets.reconstruction import RECONSTRUCT_RERUN_INPUT_PARTITIONS def generate_3dbag_version_date(context): @@ -71,205 +66,93 @@ def __init__(self, schema: str, table_tiles: str, wkt: str = None): "file_store", "file_store_fastssd", }, - code_version=resource_defs["roofer"].app.version("crop"), -) -def cropped_input_and_config_nl( - context, regular_grid_200m, tiles, index, reconstruction_input -): - """Runs roofer for cropping the input data per feature and selects the best point - cloud for the reconstruction per feature. - - 1. Crop the point clouds with the BAG footprints - 2. Select the best point cloud for the footprint and write the point cloud file - 3. Write the geoflow (.toml) reconstruction configuration file for the footprint - """ - - return cropped_input_and_config_func( - context, index, reconstruction_input, regular_grid_200m, tiles - ) - - -@asset( - required_resource_keys={"db_connection"}, - ins={ - "reconstruction_input": AssetIn(key_prefix="input"), - }, -) -def excluded_greenhouses(context, cropped_input_and_config_nl, reconstruction_input): - """Use the cropped input .las files to identify greenhouses.""" - objects_dir = cropped_input_and_config_nl.joinpath("objects") - if not objects_dir.exists(): - raise Failure(f"input features don't exists for {cropped_input_and_config_nl}") - - query = SQL("""SELECT identificatie from {reconstruction_input} - WHERE st_area(geometry) > 100""") - - res = context.resources.db_connection.connect.get_query( - query, query_params={"reconstruction_input": reconstruction_input} - ) - for feature in objects_dir.iterdir(): - if feature.is_dir(): - if feature.name in res: - context.log.info( - f"Feature {feature.name} has > 100m2 area and will be checked" - ) - break - - # new_table = PostgresTableIdentifier(RECONSTRUCTION_INPUT_SCHEMA, "excluded_greenhouses") - - # return Output(new_table, metadata=metadata) - - -@asset( - partitions_def=PartitionDefinition3DBagReconstruction( - schema=RECONSTRUCTION_INPUT_SCHEMA, table_tiles="tiles", wkt=ZUID_HOLLAND - ), - ins={ - "regular_grid_200m": AssetIn(key_prefix="ahn"), - "tiles": AssetIn(key_prefix="input"), - "index": AssetIn(key_prefix="input"), - "reconstruction_input": AssetIn(key_prefix="input"), - }, - required_resource_keys={ - "db_connection", - "roofer", - "file_store", - "file_store_fastssd", + code_version=resource_defs["roofer"].app.version("roofer"), + config_schema={ + "dir_tiles_200m_ahn3": Field( + str, + description="Directory of the 200m tiles of AHN3. Used if the tiles are stored in a non-standard location.", + is_required=False, + ), + "dir_tiles_200m_ahn4": Field( + str, + description="Directory of the 200m tiles of AHN4. Used if the tiles are stored in a non-standard location.", + is_required=False, + ), + "dir_tiles_200m_ahn5": Field( + str, + description="Directory of the 200m tiles of AHN5. Used if the tiles are stored in a non-standard location.", + is_required=False, + ), }, - code_version=resource_defs["roofer"].app.version("crop"), ) -def cropped_input_and_config_zuid_holland( +def reconstructed_building_models_nl( context, regular_grid_200m, tiles, index, reconstruction_input ): - """Runs roofer for cropping the input data per feature and selects the best point - cloud for the reconstruction per feature. - - 1. Crop the point clouds with the BAG footprints - 2. Select the best point cloud for the footprint and write the point cloud file - 3. Write the geoflow (.toml) reconstruction configuration file for the footprint - """ - - return cropped_input_and_config_func( - context, index, reconstruction_input, regular_grid_200m, tiles - ) - - -@asset( - partitions_def=PartitionDefinition3DBagReconstruction( - schema=RECONSTRUCTION_INPUT_SCHEMA, table_tiles="tiles" - ), - required_resource_keys={"geoflow", "file_store", "file_store_fastssd"}, - code_version=resource_defs["geoflow"].app.version("geof"), -) -def reconstructed_building_models_nl(context, cropped_input_and_config_nl): """Generate the 3D building models by running the reconstruction sequentially - within one partition. Runs geof.""" - return reconstruct_building_models_func(context, cropped_input_and_config_nl) - - -@asset( - partitions_def=StaticPartitionsDefinition( - partition_keys=RECONSTRUCT_RERUN_INPUT_PARTITIONS - ), - ins={ - "regular_grid_200m": AssetIn(key_prefix="ahn"), - "tiles": AssetIn(key_prefix="input"), - "index": AssetIn(key_prefix="input"), - "reconstruction_input": AssetIn(key_prefix="input"), - }, - required_resource_keys={ - "db_connection", - "roofer", - "file_store", - "file_store_fastssd", - }, - code_version=resource_defs["roofer"].app.version("crop"), -) -def cropped_input_and_config_nl_rerun( - context, regular_grid_200m, tiles, index, reconstruction_input -): - """Rerun the reconstruction with just a specific set of partitions.""" - return cropped_input_and_config_func( - context, index, reconstruction_input, regular_grid_200m, tiles + within one partition. + Runs roofer.""" + + roofer_toml, output_dir, tile_view = create_roofer_config( + context, + index, + reconstruction_input, + regular_grid_200m, + tiles, + dir_tiles_200m_ahn3=context.op_config.get("dir_tiles_200m_ahn3"), + dir_tiles_200m_ahn4=context.op_config.get("dir_tiles_200m_ahn4"), + dir_tiles_200m_ahn5=context.op_config.get("dir_tiles_200m_ahn5"), ) + context.log.info(f"{roofer_toml=}") + context.log.info(f"{tile_view=}") -@asset( - partitions_def=StaticPartitionsDefinition( - partition_keys=RECONSTRUCT_RERUN_INPUT_PARTITIONS - ), - required_resource_keys={"geoflow", "file_store", "file_store_fastssd"}, - code_version=resource_defs["geoflow"].app.version("geof"), -) -def reconstructed_building_models_nl_rerun(context, cropped_input_and_config_nl_rerun): - """Rerun the reconstruction with just a specific set of partitions.""" - return reconstruct_building_models_func(context, cropped_input_and_config_nl_rerun) - - -@asset( - partitions_def=PartitionDefinition3DBagReconstruction( - schema=RECONSTRUCTION_INPUT_SCHEMA, table_tiles="tiles", wkt=ZUID_HOLLAND - ), - required_resource_keys={"geoflow", "file_store", "file_store_fastssd"}, - code_version=resource_defs["geoflow"].app.version("geof"), -) -def reconstructed_building_models_zuid_holland( - context, cropped_input_and_config_zuid_holland -): - """Generate the 3D building models by running the reconstruction sequentially - within one partition. Runs geof.""" - return reconstruct_building_models_func( - context, cropped_input_and_config_zuid_holland - ) + try: + return_code, output = context.resources.roofer.app.execute( + exe_name="roofer", + command=f"{{exe}} --config {{local_path}} {output_dir}", + local_path=roofer_toml, + silent=False, + ) + context.log.debug(f"{return_code=} {output=}") + if return_code != 0 or "error" in output.lower(): + context.log.error(output) + raise Failure + finally: + context.resources.db_connection.connect.send_query( + SQL("DROP VIEW {tile_view}"), query_params={"tile_view": tile_view} + ) -def cropped_input_and_config_func( - context, index, reconstruction_input, regular_grid_200m, tiles +def create_roofer_config( + context, + index, + reconstruction_input, + regular_grid_200m, + tiles, + dir_tiles_200m_ahn3=None, + dir_tiles_200m_ahn4=None, + dir_tiles_200m_ahn5=None, ): toml_template = """ - [input.footprint] - path = '{footprint_file}' - id_attribute = "identificatie" - - [[input.pointclouds]] + polygon-source = "{footprint_file}" + id-attribute = "identificatie" + force-lod11-attribute = "kas_warenhuis" + + cellsize = 0.5 + lod = 22 + + split-cjseq = true + output-directory = "{output_path}" + + [[pointclouds]] name = "AHN3" quality = 1 - path = '{ahn3_files}' - - [[input.pointclouds]] + source = {ahn3_files} + + [[pointclouds]] name = "AHN4" quality = 0 - path = '{ahn4_files}' - - [parameters] - cellsize = 0.5 - - [output] - path = '{output_path}' - - # {{bid}} will be replaced by building identifier - # {{pc_name}} will be replaced by input pointcloud name - # {{path}} will be replaced by path - building_toml_file = '{{path}}/objects/{{bid}}/config_{{pc_name}}.toml' - building_las_file = '{{path}}/objects/{{bid}}/crop/{{bid}}_{{pc_name}}.las' - building_raster_file = '{{path}}/objects/{{bid}}/crop/{{bid}}_{{pc_name}}.tif' - building_gpkg_file = '{{path}}/objects/{{bid}}/crop/{{bid}}.gpkg' - building_jsonl_file = '{{path}}/objects/{{bid}}/reconstruct/{{bid}}.city.jsonl' - - metadata_json_file = '{{path}}/metadata.json' - jsonl_list_file = '{{path}}/features.txt' - index_file = '{{path}}/index.gpkg' - - # these get passed through to the geoflow config files that are generated for each building - [output.reconstruction_parameters] - GF_PROCESS_CRS="EPSG:7415" - OUTPUT_CRS="EPSG:7415" - CITYJSON_TRANSLATE_X=171800.0 - CITYJSON_TRANSLATE_Y=472700.0 - CITYJSON_TRANSLATE_Z=0.0 - CITYJSON_SCALE_X=0.001 - CITYJSON_SCALE_Y=0.001 - CITYJSON_SCALE_Z=0.001 + source = {ahn4_files} """ tile_id = context.partition_key query_laz_tiles = SQL(""" @@ -288,16 +171,22 @@ def cropped_input_and_config_func( "tile_id": tile_id, }, ) - out_dir_ahn3 = ahn_dir( - context.resources.file_store.file_store.data_dir, ahn_version=3 - ).joinpath("tiles_200m") + if dir_tiles_200m_ahn3 is not None: + out_dir_ahn3 = Path(dir_tiles_200m_ahn3) + else: + out_dir_ahn3 = ahn_dir( + context.resources.file_store.file_store.data_dir, ahn_version=3 + ).joinpath("tiles_200m") laz_files_ahn3 = [ str(out_dir_ahn3 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res ] # TODO: probably should take the tiles_200m directory from the asset output - out_dir_ahn4 = ahn_dir( - context.resources.file_store.file_store.data_dir, ahn_version=4 - ).joinpath("tiles_200m") + if dir_tiles_200m_ahn4 is not None: + out_dir_ahn4 = Path(dir_tiles_200m_ahn4) + else: + out_dir_ahn4 = ahn_dir( + context.resources.file_store.file_store.data_dir, ahn_version=4 + ).joinpath("tiles_200m") # TODO: same with the laz filename pattern laz_files_ahn4 = [ str(out_dir_ahn4 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res @@ -326,75 +215,13 @@ def cropped_input_and_config_func( ).joinpath(tile_id) output_dir.mkdir(exist_ok=True, parents=True) output_toml = toml_template.format( - tile_id=tile_id, footprint_file=f"PG:{context.resources.db_connection.connect.dsn} tables={tile_view}", - ahn3_files=" ".join(laz_files_ahn3), - ahn4_files=" ".join(laz_files_ahn4), + ahn3_files=laz_files_ahn3, + ahn4_files=laz_files_ahn4, output_path=output_dir, ) - path_toml = output_dir / "crop.toml" + path_toml = output_dir / "roofer.toml" with path_toml.open("w") as of: of.write(output_toml) - context.resources.roofer.app.execute( - "crop", "{exe} -c {local_path}", local_path=path_toml - ) - context.resources.db_connection.connect.send_query( - SQL("DROP VIEW {tile_view}"), query_params={"tile_view": tile_view} - ) - # TODO: what are the conditions for partition failure? - objects_dir = output_dir.joinpath("objects") - if objects_dir.exists(): - feature_count = sum(1 for f in objects_dir.iterdir() if f.is_dir()) - else: - feature_count = 0 - return Output( - output_dir, metadata={"feature_count": feature_count, "path": str(output_dir)} - ) - -def reconstruct_building_models_func(context, cropped_input_and_config): - context.log.info(f"geoflow.kwargs: {context.resources.geoflow.app.kwargs}") - flowchart = context.resources.geoflow.app.kwargs["flowcharts"]["reconstruct"] - cmd_template = "{{exe}} {{local_path}} --config {config_path}" - # TODO: what are the conditions for partition failure? - objects_dir = cropped_input_and_config.joinpath("objects") - if not objects_dir.exists(): - # In this case it would make more sense to raise Failure, but then a downstream, - # un-partitioned asset will never execute, because the downstream un-partitioned - # asset will expect that *all* upstream partitions succeeded. - # Maybe we could do something with a custom partition loader here... - context.log.error(f"input features don't exists for {cropped_input_and_config}") - return Output( - cropped_input_and_config, - metadata={"failed_nr": 0, "failed_id": [], "success_nr": 0}, - ) - failed = [] - cnt = 0 - for feature in objects_dir.iterdir(): - if feature.is_dir(): - config_path = feature.joinpath("config_.toml") - cmd = cmd_template.format(config_path=config_path) - try: - return_code, output = context.resources.geoflow.app.execute( - "geof", cmd, local_path=flowchart, silent=False - ) - if return_code != 0 or "error" in output.lower(): - context.log.error(output) - raise Failure - else: - cnt += 1 - except Failure: - failed.append(feature) - if cnt == 0: - raise Failure( - f"all features failed the reconstruction in {cropped_input_and_config}" - ) - else: - return Output( - cropped_input_and_config, - metadata={ - "failed_nr": len(failed), - "failed_id": [f.name for f in failed], - "success_nr": cnt, - }, - ) + return path_toml, output_dir, tile_view diff --git a/packages/core/src/bag3d/core/code_location.py b/packages/core/src/bag3d/core/code_location.py index b20900a2..522c4afa 100644 --- a/packages/core/src/bag3d/core/code_location.py +++ b/packages/core/src/bag3d/core/code_location.py @@ -15,10 +15,6 @@ job_ahn4, job_ahn5, job_ahn_tiles_200m, - job_zuid_holland_reconstruct, - job_zuid_holland_export, - job_zuid_holland_deploy, - job_zuid_holland_export_deploy, job_nl_reconstruct, job_nl_export, job_nl_deploy, @@ -39,10 +35,6 @@ job_ahn4, job_ahn5, job_ahn_tiles_200m, - job_zuid_holland_reconstruct, - job_zuid_holland_export, - job_zuid_holland_deploy, - job_zuid_holland_export_deploy, job_nl_reconstruct, job_nl_export, job_nl_deploy, diff --git a/packages/core/src/bag3d/core/jobs.py b/packages/core/src/bag3d/core/jobs.py index 81f1fe9d..8ccfd79b 100644 --- a/packages/core/src/bag3d/core/jobs.py +++ b/packages/core/src/bag3d/core/jobs.py @@ -6,7 +6,7 @@ description="Get the tile index (bladwijzer), md5 hashes of the LAZ files and " "create the tables for storing the metadata for AHN 3, 4 and 5, so that " "the AHN jobs can be run.", - selection=AssetSelection.assets(["ahn", "tile_index_pdok"]) + selection=AssetSelection.assets(["ahn", "tile_index_ahn"]) | AssetSelection.assets(["ahn", "md5_ahn3"]) | AssetSelection.assets(["ahn", "md5_ahn4"]) | AssetSelection.assets(["ahn", "sha256_ahn5"]) @@ -65,8 +65,9 @@ job_nl_reconstruct = define_asset_job( name="nl_reconstruct", description="Run the crop and reconstruct steps for the Netherlands.", - selection=AssetSelection.assets(["reconstruction", "cropped_input_and_config_nl"]) - | AssetSelection.assets(["reconstruction", "reconstructed_building_models_nl"]), + selection=AssetSelection.assets( + ["reconstruction", "reconstructed_building_models_nl"] + ), ) job_nl_export = define_asset_job( @@ -88,49 +89,3 @@ | AssetSelection.assets(["deploy", "downloadable_godzilla"]) | AssetSelection.assets(["deploy", "webservice_godzilla"]), ) - - -job_zuid_holland_reconstruct = define_asset_job( - name="zuid_holland_reconstruct", - description="Run the crop and reconstruct steps for the province of Zuid-Holland.", - selection=AssetSelection.assets( - ["reconstruction", "cropped_input_and_config_zuid_holland"] - ) - | AssetSelection.assets( - ["reconstruction", "reconstructed_building_models_zuid_holland"] - ), -) - -job_zuid_holland_export = define_asset_job( - name="zuid_holland_export", - description="Run the tyler export and 3D Tiles steps for the province of " - "Zuid-Holland.", - selection=AssetSelection.assets(["export", "feature_evaluation"]) - | AssetSelection.assets(["export", "export_index"]) - | AssetSelection.assets(["export", "metadata"]) - | AssetSelection.assets(["export", "geopackage_nl"]) - | AssetSelection.assets(["export", "reconstruction_output_multitiles_zuid_holland"]) - | AssetSelection.assets(["export", "reconstruction_output_3dtiles_zuid_holland"]), -) - -job_zuid_holland_deploy = define_asset_job( - name="zuid_holland_deploy", - description="Deploy the Zuid-Holland data.", - selection=AssetSelection.assets(["deploy", "compressed_export_zuid_holland"]) - | AssetSelection.assets(["deploy", "downloadable_godzilla"]) - | AssetSelection.assets(["deploy", "webservice_godzilla"]), -) - -job_zuid_holland_export_deploy = define_asset_job( - name="zuid_holland_export_deploy", - description="Run the tyler export and 3D Tiles and deploy steps for the province of " - "Zuid-Holland.", - selection=AssetSelection.assets(["export", "feature_evaluation"]) - | AssetSelection.assets(["export", "export_index"]) - | AssetSelection.assets(["export", "metadata"]) - | AssetSelection.assets(["export", "geopackage_nl"]) - | AssetSelection.assets(["export", "reconstruction_output_multitiles_zuid_holland"]) - | AssetSelection.assets(["deploy", "compressed_export_zuid_holland"]) - | AssetSelection.assets(["deploy", "downloadable_godzilla"]) - | AssetSelection.assets(["deploy", "webservice_godzilla"]), -) diff --git a/packages/core/tests/conftest.py b/packages/core/tests/conftest.py index a9d22bc6..704edeb5 100644 --- a/packages/core/tests/conftest.py +++ b/packages/core/tests/conftest.py @@ -126,7 +126,7 @@ def sha256_ahn5_fix(): @pytest.fixture(scope="session") -def tile_index_pdok_fix(): +def tile_index_ahn_fix(): yield { "01cz1": { "AHN3_LAZ": "https://ns_hwh.fundaments.nl/hwh-ahn/AHN3/LAZ/C_01CZ1.LAZ", diff --git a/packages/core/tests/test_assets_ahn.py b/packages/core/tests/test_assets_ahn.py index 4886da78..886531f7 100644 --- a/packages/core/tests/test_assets_ahn.py +++ b/packages/core/tests/test_assets_ahn.py @@ -14,7 +14,7 @@ md5_ahn3, md5_ahn4, sha256_ahn5, - tile_index_pdok, + tile_index_ahn, ) from bag3d.core.assets.ahn.metadata import ( metadata_table_ahn3, @@ -35,13 +35,13 @@ def test_tile_index_origin(): def test_download_ahn_index(): tile_ids = download_ahn_index() - assert len(tile_ids) == 1406 + assert len(tile_ids) == 1407 assert tile_ids[list(tile_ids.keys())[0]] is None def test_download_ahn_index_geometry(): features = download_ahn_index(with_geom=True) - assert len(features) == 1406 + assert len(features) == 1407 assert features[list(features.keys())[0]] is not None @@ -78,36 +78,36 @@ def test_checksums_for_ahn(context): assert sha is not None -def test_tile_index_pdok(context): - res = tile_index_pdok(context) - assert len(res) == 1406 +def test_tile_index_ahn(context): + res = tile_index_ahn(context) + assert len(res) == 1407 assert res[list(res.keys())[0]] is not None @pytest.mark.slow -def test_laz_files_ahn3(context, md5_ahn3_fix, tile_index_pdok_fix): +def test_laz_files_ahn3(context, md5_ahn3_fix, tile_index_ahn_fix): laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 3) laz_dir.mkdir(exist_ok=True, parents=True) - res = laz_files_ahn3(context, md5_ahn3_fix, tile_index_pdok_fix) + res = laz_files_ahn3(context, md5_ahn3_fix, tile_index_ahn_fix) assert res.value.url is not None assert res is not None print(res.value) @pytest.mark.slow -def test_laz_files_ahn4(context, md5_ahn4_fix, tile_index_pdok_fix): +def test_laz_files_ahn4(context, md5_ahn4_fix, tile_index_ahn_fix): laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 4) laz_dir.mkdir(exist_ok=True, parents=True) - res = laz_files_ahn4(context, md5_ahn4_fix, tile_index_pdok_fix) + res = laz_files_ahn4(context, md5_ahn4_fix, tile_index_ahn_fix) assert res.value.url is not None assert res is not None @pytest.mark.slow -def test_laz_files_ahn5(context, sha256_ahn5_fix, tile_index_pdok_fix): +def test_laz_files_ahn5(context, sha256_ahn5_fix, tile_index_ahn_fix): laz_dir = ahn_laz_dir(context.resources.file_store.file_store.data_dir, 5) laz_dir.mkdir(exist_ok=True, parents=True) - res = laz_files_ahn5(context, sha256_ahn5_fix, tile_index_pdok_fix) + res = laz_files_ahn5(context, sha256_ahn5_fix, tile_index_ahn_fix) assert res.value.url is not None assert res is not None diff --git a/packages/core/tests/test_integration.py b/packages/core/tests/test_integration.py index 6df4247d..7e809715 100644 --- a/packages/core/tests/test_integration.py +++ b/packages/core/tests/test_integration.py @@ -42,7 +42,10 @@ def test_integration_reconstruction_and_export( exe_geoflow=os.getenv("EXE_PATH_ROOFER_RECONSTRUCT"), flowchart=os.getenv("FLOWCHART_PATH_RECONSTRUCT"), ), - "roofer": RooferResource(exe_roofer_crop=os.getenv("EXE_PATH_ROOFER_CROP")), + "roofer": RooferResource( + exe_roofer=os.getenv("EXE_PATH_ROOFER_ROOFER"), + exe_crop=os.getenv("EXE_PATH_ROOFER_CROP"), + ), "gdal": GDALResource( exe_ogr2ogr=os.getenv("EXE_PATH_OGR2OGR"), exe_ogrinfo=os.getenv("EXE_PATH_OGRINFO"), diff --git a/scripts/symlink_laz_per_province/requirements.txt b/scripts/symlink_laz_per_province/requirements.txt index 4e0f616e..f5ae686e 100644 --- a/scripts/symlink_laz_per_province/requirements.txt +++ b/scripts/symlink_laz_per_province/requirements.txt @@ -1,2 +1,3 @@ bag3d-common @ git+https://github.com/3DBAG/3dbag-pipeline.git@develop#egg=bag3d-common&subdirectory=packages/common -shapely==2.0.6 \ No newline at end of file +shapely==2.0.6 +psycopg==3.2.3 \ No newline at end of file diff --git a/scripts/symlink_laz_per_province/symlink_laz_per_province.py b/scripts/symlink_laz_per_province/symlink_laz_per_province.py index 2aad143a..9ddf3917 100644 --- a/scripts/symlink_laz_per_province/symlink_laz_per_province.py +++ b/scripts/symlink_laz_per_province/symlink_laz_per_province.py @@ -9,9 +9,13 @@ from pathlib import Path from sys import stdout -from bag3d.common.utils.requests import download_as_str +import psycopg from shapely import STRtree from shapely.geometry import shape +import dotenv +from bag3d.common.utils.requests import download_as_str + +dotenv.load_dotenv() def ahn_filename(tile_name: str) -> str: @@ -38,6 +42,9 @@ def configure_logging(verbosity): description="Create directories per Dutch province and creates symlinks to the LAZ files that are within the given province.", ) parser.add_argument("--laz", help="Directory containing the LAZ files.") +parser.add_argument("--t200", help="Directory containing the 200m tiles.") +parser.add_argument("--dbname") +parser.add_argument("--user") parser.add_argument( "--output", help="Directory to store the symlinks that point to the files in '--laz'.", @@ -46,6 +53,7 @@ def configure_logging(verbosity): if __name__ == "__main__": args = parser.parse_args() path_laz = Path(args.laz) + path_200m = Path(args.t200) if not path_laz.is_dir(): raise NotADirectoryError(f"LAZ directory does not exist: {path_laz}") path_output = Path(args.output) @@ -106,6 +114,11 @@ def configure_logging(verbosity): log.info( f"Creating symlinks for provinces in {args.output} to the LAZ files in {args.laz}" ) + + # AHN metadata database + schema = "ahn" + table = "regular_grid_200m" + for province in dict_provinces["features"]: province_name = province["properties"]["naam"] province_geometry = shape(province["geometry"]) @@ -114,11 +127,11 @@ def configure_logging(verbosity): dict_ahn_bladwijzer["features"][tile_i]["properties"]["AHN"] for tile_i in result ] - province_path = path_output / province_name.lower() - province_path.mkdir(parents=True, exist_ok=True) + province_path_laz = path_output / province_name.lower() / "LAZ" + province_path_laz.mkdir(parents=True, exist_ok=True) for ahn_tile_name in ahn_tile_names: filename = ahn_filename(ahn_tile_name) - path_link = province_path / filename + path_link = province_path_laz / filename path_lazfile = path_laz / filename if path_lazfile.is_file(): if path_link.exists(): @@ -127,4 +140,26 @@ def configure_logging(verbosity): else: log.error(f"File does not exist: {path_lazfile}") + # Link the 200m tiles + province_path_200m = path_output / province_name.lower() / "tiles_200m" + province_path_200m.mkdir(parents=True, exist_ok=True) + with psycopg.connect( + dbname=args.dbname, user=args.user, host="localhost", port=5432 + ) as conn: + with conn.cursor() as cur: + wkt = f"SRID=28992;{province_geometry.wkt}" + cur.execute( + f"select id from {schema}.{table} where st_intersects(geom, '{wkt}')" + ) + for record in cur: + filename = f"t_{record[0]}.laz" + path_link = province_path_200m / filename + path_200mfile = path_200m / filename + if path_200mfile.is_file(): + if path_link.exists(): + path_link.unlink() + path_link.symlink_to(path_200mfile) + else: + log.error(f"File does not exist: {path_200mfile}") + log.info("Done") diff --git a/tools-test.sh b/tools-test.sh index 24977d55..80efd155 100644 --- a/tools-test.sh +++ b/tools-test.sh @@ -89,7 +89,7 @@ check_exe_help() { check_exe "tyler" check_exe "tyler-db" -check_exe "crop" +check_exe "roofer" check_exe "geof" check_exe "ogr2ogr" check_exe "ogrinfo"