From 85c7ff192bba312a7c3952392420c2c5a6a9ab78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 04:40:25 +0100 Subject: [PATCH 1/8] Scripts update --- scripts/roofer-logs-plot.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/roofer-logs-plot.py b/scripts/roofer-logs-plot.py index 0d7f4083..f5f4be16 100644 --- a/scripts/roofer-logs-plot.py +++ b/scripts/roofer-logs-plot.py @@ -1,7 +1,8 @@ import pandas as pd from matplotlib import pyplot as plt -reconstruction_times = pd.read_csv("scripts/roofer-logs.csv", index_col="building_id") +reconstruction_times = pd.read_csv("scripts/reconstruction-times.csv").drop_duplicates(subset=["building_id"]) +reconstruction_times.set_index("building_id", inplace=True) fig = plt.figure(figsize=(10,7)) fig.subplots_adjust(bottom=0.3) @@ -15,3 +16,7 @@ p = reconstruction_times.mean().plot(kind="bar") plt.title("mean") plt.show() + +reconstruction_times["total"] = reconstruction_times.sum(axis=1) +above_10min = reconstruction_times[reconstruction_times["total"] > 600000] +above_10min.to_csv("scripts/reconstruction-times-above_10min.csv") \ No newline at end of file From 7525437aaaccba94f053cca202a0ee8d5687716c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 04:40:41 +0100 Subject: [PATCH 2/8] Include AHN5 in reconstruction --- .../core/assets/reconstruction/reconstruction.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py index bbab862b..f03a3669 100644 --- a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py +++ b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py @@ -165,6 +165,11 @@ def create_roofer_config( name = "AHN4" quality = 0 source = {ahn4_files} + + [[pointclouds]] + name = "AHN5" + quality = 2 + source = {ahn5_files} [output-attributes] status = "b3_status" @@ -231,9 +236,12 @@ def create_roofer_config( laz_files_ahn4 = [ str(out_dir_ahn4 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res ] - out_dir_ahn5 = ahn_dir( - context.resources.file_store.file_store.data_dir, ahn_version=5 - ).joinpath("tiles_200m") + if dir_tiles_200m_ahn5 is not None: + out_dir_ahn5 = Path(dir_tiles_200m_ahn5) + else: + out_dir_ahn5 = ahn_dir( + context.resources.file_store.file_store.data_dir, ahn_version=5 + ).joinpath("tiles_200m") laz_files_ahn5 = [ str(out_dir_ahn5 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res ] From 54b57715122749bfbf6c9640c4642d6cf0ac24e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 04:41:26 +0100 Subject: [PATCH 3/8] Set lod11-fallback-area --- .../core/src/bag3d/core/assets/reconstruction/reconstruction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py index f03a3669..59f75f5d 100644 --- a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py +++ b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py @@ -150,6 +150,7 @@ def create_roofer_config( polygon-source = "{footprint_file}" id-attribute = "identificatie" force-lod11-attribute = "kas_warenhuis" + lod11-fallback-area = 20000 split-cjseq = true omit-metadata = true From 6e9d950915279fe74d8d6fe3f1ef6f3519f779c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 05:45:30 +0100 Subject: [PATCH 4/8] Update test data with AHN5 The AHN5 200m tiles are just a copy of the AHN4 files --- makefile | 2 +- packages/core/tests/test_integration.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/makefile b/makefile index 950abbf9..b2354bec 100644 --- a/makefile +++ b/makefile @@ -8,7 +8,7 @@ source: download: source rm -rf $(BAG3D_TEST_DATA) mkdir -p $(BAG3D_TEST_DATA) - cd $(BAG3D_TEST_DATA) ; curl -O https://data.3dbag.nl/testdata/pipeline/test_data_v4.zip ; unzip -q test_data_v4.zip ; rm test_data_v4.zip + cd $(BAG3D_TEST_DATA) ; curl -O https://data.3dbag.nl/testdata/pipeline/test_data_v4.zip ; unzip -q test_data_v5.zip ; rm test_data_v4.zip docker_volume_create: docker rm -f bag3d-dev-temp-container > /dev/null 2>&1 || true diff --git a/packages/core/tests/test_integration.py b/packages/core/tests/test_integration.py index 7e809715..32c4dd66 100644 --- a/packages/core/tests/test_integration.py +++ b/packages/core/tests/test_integration.py @@ -71,7 +71,6 @@ def test_integration_reconstruction_and_export( for asset in all_reconstruction_assets if asset.key in { - AssetKey(["reconstruction", "cropped_input_and_config_nl"]), AssetKey(["reconstruction", "reconstructed_building_models_nl"]), } ] From 76660771599a665e9a162aed5bd7d240ac98832b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 05:57:33 +0100 Subject: [PATCH 5/8] Fix integration test --- makefile | 2 +- packages/core/tests/test_integration.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/makefile b/makefile index b2354bec..dd2828a8 100644 --- a/makefile +++ b/makefile @@ -52,7 +52,7 @@ docker_watch: docker_build: docker compose -p bag3d-dev -f docker/compose.yaml build --no-cache -docker_restart: docker_down_rm docker_volume_rm docker_volume_create docker_up +docker_restart: docker_down docker_volume_recreate docker_up docker_down: docker compose -p bag3d-dev down --remove-orphans diff --git a/packages/core/tests/test_integration.py b/packages/core/tests/test_integration.py index 32c4dd66..3d64c37b 100644 --- a/packages/core/tests/test_integration.py +++ b/packages/core/tests/test_integration.py @@ -31,6 +31,7 @@ def test_integration_reconstruction_and_export( # update quadtree og_quadtree = test_data_dir / "quadtree.tsv" export_dir = test_data_dir / "reconstruction_input" / "3DBAG" / "export" + export_dir.mkdir(exist_ok=True) os.system(f"cp {og_quadtree} {export_dir}") resources = { From 1abae49cec369a6c38eb87d70aeeab57e1d892f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sat, 30 Nov 2024 06:15:03 +0100 Subject: [PATCH 6/8] Fix ruff check --- packages/core/src/bag3d/core/assets/export/tile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/core/src/bag3d/core/assets/export/tile.py b/packages/core/src/bag3d/core/assets/export/tile.py index 73bd235f..b061cbe4 100644 --- a/packages/core/src/bag3d/core/assets/export/tile.py +++ b/packages/core/src/bag3d/core/assets/export/tile.py @@ -4,7 +4,6 @@ from dagster import AssetKey, asset from bag3d.common.utils.files import geoflow_crop_dir, bag3d_dir, bag3d_export_dir -from bag3d.common.resources import resource_defs def create_sequence_header_file(template_file, output_file, version_3dbag): From 884176b923038f89ecff07e4b23a4634aa74e57a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sun, 1 Dec 2024 06:47:11 +0100 Subject: [PATCH 7/8] Update scripts --- scripts/backfill-status.py | 75 ++++++++++++++++++++++++++++++ scripts/reload-code-location.py | 39 +++++++++++----- scripts/roofer-logs-parse-debug.py | 52 +++++++++++++++++++++ scripts/roofer-logs-parse.py | 10 ++++ scripts/roofer-logs-plot.py | 10 ++-- 5 files changed, 171 insertions(+), 15 deletions(-) create mode 100644 scripts/backfill-status.py create mode 100644 scripts/roofer-logs-parse-debug.py diff --git a/scripts/backfill-status.py b/scripts/backfill-status.py new file mode 100644 index 00000000..ba56f94c --- /dev/null +++ b/scripts/backfill-status.py @@ -0,0 +1,75 @@ +QUERY_BACKFILL_STATUS = """ +query BackfillPartitionStatus($backfillId: String!) { + partitionBackfillOrError(backfillId: $backfillId) { + __typename + ... on PartitionBackfill { + id + status + partitionStatuses { + results { + partitionName + runId + runStatus + } + } + partitionStatuses {results {partitionName }} + } + ... on PythonError { + message + stack + } + } +} +""" + +variables = { + "backfillId": "backfill id", +} +reload_res = requests.post( + "http://{dagit_host}:3000/graphql?query={query_string}&variables={variables}".format( + dagit_host=dagit_host, + query_string=RELOAD_REPOSITORY_LOCATION_MUTATION, + variables=json.dumps(variables), + ) +).json() + +""" +{ + "data": { + "partitionBackfillOrError": { + "__typename": "PartitionBackfill", + "id": "byauudaj", + "status": "COMPLETED", + "partitionStatuses": { + "results": [ + { + "partitionName": "30dz2", + "runId": "bd06fd2d-c11f-4cac-95a5-7da5a2da1bab", + "runStatus": "STARTED" + }, + { + "partitionName": "30gz1", + "runId": "d2e8d73a-9673-4101-a0e5-cdd356603a50", + "runStatus": "STARTED" + }, + { + "partitionName": "37bn1", + "runId": "5b545bfe-6c1b-4028-b467-118ef505b530", + "runStatus": "SUCCESS" + }, + { + "partitionName": "37bn2", + "runId": "b1c66cc6-c041-438c-815b-e051be4052a2", + "runStatus": "SUCCESS" + }, + { + "partitionName": "37en1", + "runId": "1f4618a8-be67-444b-98fe-a22d693a1fb2", + "runStatus": "SUCCESS" + } + ] + } + } + } +} +""" diff --git a/scripts/reload-code-location.py b/scripts/reload-code-location.py index 30087e7f..c24783bf 100644 --- a/scripts/reload-code-location.py +++ b/scripts/reload-code-location.py @@ -1,3 +1,13 @@ +import argparse +import json + +import requests + +parser = argparse.ArgumentParser() +parser.add_argument("--host", help="dagster host", default="localhost") +parser.add_argument("--port", help="dagster port", type=int, default=3000) +parser.add_argument("--code-location", help="dagster code location", default="core") + RELOAD_REPOSITORY_LOCATION_MUTATION = """ mutation ($repositoryLocationName: String!) { reloadRepositoryLocation(repositoryLocationName: $repositoryLocationName) { @@ -19,17 +29,24 @@ } """ -dagit_host = "your_dagit_host_here" +if __name__ == "__main__": + args = parser.parse_args() -variables = { - "repositoryLocationName": "your_location_name_here", -} -reload_res = requests.post( - "http://{dagit_host}:3000/graphql?query={query_string}&variables={variables}".format( - dagit_host=dagit_host, - query_string=RELOAD_REPOSITORY_LOCATION_MUTATION, - variables=json.dumps(variables), + variables = { + "repositoryLocationName": args.code_location, + } + reload_res = requests.post( + "http://{dagster_host}:{dagster_port}/graphql?query={query_string}&variables={variables}".format( + dagster_host=args.host, + dagster_port=args.port, + query_string=RELOAD_REPOSITORY_LOCATION_MUTATION, + variables=json.dumps(variables), + ) + ).json() + + did_succeed = ( + reload_res["data"]["reloadRepositoryLocation"]["__typename"] + == "RepositoryLocation" ) -).json() -did_succeed = reload_res["data"]["reloadRepositoryLocation"]["__typename"] == "RepositoryLocation" \ No newline at end of file + exit(not did_succeed) diff --git a/scripts/roofer-logs-parse-debug.py b/scripts/roofer-logs-parse-debug.py new file mode 100644 index 00000000..c7ac50ca --- /dev/null +++ b/scripts/roofer-logs-parse-debug.py @@ -0,0 +1,52 @@ +"""Parse all dagster debug files and find the building IDs that are unfinished in the +reconstruction. + +The dagster debug files are downloaded from the UI. Go to the Runs overview to see the +individual runs, the drop-down menu of the 'View' button of the run has a +'Download debug file' option, click that. + +Download all .gz files into a directory. That dirpath is the --debug-dir input. + +The script extracts the '[reconsturctor] start: ' and +'[reconstructor] finish: ' records, matches the start-finish IDs and +prints the IDs that don't have a 'finish' record. +""" + +import argparse +import gzip +import json +import re +from pathlib import Path + + +parser = argparse.ArgumentParser() +parser.add_argument( + "--debug-dir", + help="Directory with the dagster debug files that are gzipped.", + type=Path, +) + +if __name__ == "__main__": + args = parser.parse_args() + + # (ID, started, finished) + buildings_started = [] + buildings_finished = [] + re_pat_start = re.compile(r"(?<=\[reconstructor\] start:) (/.*?\.\S*)") + re_pat_finish = re.compile(r"(?<=\[reconstructor\] finish:) (/.*?\.\S*)") + for p_gz in args.debug_dir.iterdir(): + with gzip.open(p_gz, "rb") as f: + debug_messages = json.load(f) + for event in debug_messages["event_list"]: + if event["step_key"] == "reconstructed_building_models_nl": + record = event["user_message"] + if (res := re_pat_start.search(record)) is not None: + building_id = Path(res.group(1)).stem.rstrip(".city") + buildings_started.append(building_id) + elif (res := re_pat_finish.search(record)) is not None: + building_id = Path(res.group(1)).stem.rstrip(".city") + buildings_finished.append(building_id) + else: + continue + buildings_unfinished = set(buildings_started) - set(buildings_finished) + print(buildings_unfinished) diff --git a/scripts/roofer-logs-parse.py b/scripts/roofer-logs-parse.py index 09622e4d..cd1620dd 100644 --- a/scripts/roofer-logs-parse.py +++ b/scripts/roofer-logs-parse.py @@ -1,3 +1,13 @@ +"""Extract the reconstruction step timings from the roofer logs that are recorded by +dagster. + +Queries the successful reconstructions through the dagster graphql API. + +Get the logs from the dagster storage. + +Parse the `[reconstructor t]` log records of roofer and write the values to a CSV. +""" + import argparse import csv import re diff --git a/scripts/roofer-logs-plot.py b/scripts/roofer-logs-plot.py index f5f4be16..3b56785e 100644 --- a/scripts/roofer-logs-plot.py +++ b/scripts/roofer-logs-plot.py @@ -1,17 +1,19 @@ import pandas as pd from matplotlib import pyplot as plt -reconstruction_times = pd.read_csv("scripts/reconstruction-times.csv").drop_duplicates(subset=["building_id"]) +reconstruction_times = pd.read_csv("scripts/reconstruction-times.csv").drop_duplicates( + subset=["building_id"] +) reconstruction_times.set_index("building_id", inplace=True) -fig = plt.figure(figsize=(10,7)) +fig = plt.figure(figsize=(10, 7)) fig.subplots_adjust(bottom=0.3) p = reconstruction_times.median().plot(kind="bar") plt.title("median") plt.show() -fig = plt.figure(figsize=(10,7)) +fig = plt.figure(figsize=(10, 7)) fig.subplots_adjust(bottom=0.3) p = reconstruction_times.mean().plot(kind="bar") plt.title("mean") @@ -19,4 +21,4 @@ reconstruction_times["total"] = reconstruction_times.sum(axis=1) above_10min = reconstruction_times[reconstruction_times["total"] > 600000] -above_10min.to_csv("scripts/reconstruction-times-above_10min.csv") \ No newline at end of file +above_10min.to_csv("scripts/reconstruction-times-above_10min.csv") From 75ff08d1ac1dbf39fd5b45e5f42310e2f9bd2445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Dukai?= Date: Sun, 1 Dec 2024 07:34:35 +0100 Subject: [PATCH 8/8] Remove fysiekvoorkomen from top10nl Has been removed from source data --- packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql b/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql index ed495322..69d06c57 100644 --- a/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql +++ b/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql @@ -10,7 +10,6 @@ SELECT ogc_fid AS fid , bronnauwkeurigheid::float4 , mutatietype , typegebouw::text[] - , fysiekvoorkomen::text , hoogteklasse::text , hoogteniveau::int2 , status::text