3DBAG · balazsdukai · Dec 1, 2024 · Nov 30, 2024 · Nov 30, 2024 · Nov 30, 2024
diff --git a/makefile b/makefile
@@ -8,7 +8,7 @@ source:
 download: source
 	rm -rf $(BAG3D_TEST_DATA)
 	mkdir -p $(BAG3D_TEST_DATA)
-	cd $(BAG3D_TEST_DATA) ; curl -O https://data.3dbag.nl/testdata/pipeline/test_data_v4.zip ; unzip -q test_data_v4.zip ; rm test_data_v4.zip
+	cd $(BAG3D_TEST_DATA) ; curl -O https://data.3dbag.nl/testdata/pipeline/test_data_v4.zip ; unzip -q test_data_v5.zip ; rm test_data_v4.zip
 
 docker_volume_create:
 	docker rm -f bag3d-dev-temp-container > /dev/null 2>&1 || true
@@ -52,7 +52,7 @@ docker_watch:
 docker_build:
 	docker compose -p bag3d-dev -f docker/compose.yaml build --no-cache
 
-docker_restart: docker_down_rm docker_volume_rm docker_volume_create docker_up
+docker_restart: docker_down docker_volume_recreate docker_up
 
 docker_down:
 	docker compose -p bag3d-dev down --remove-orphans

diff --git a/packages/core/src/bag3d/core/assets/export/tile.py b/packages/core/src/bag3d/core/assets/export/tile.py
@@ -4,7 +4,6 @@
 from dagster import AssetKey, asset
 
 from bag3d.common.utils.files import geoflow_crop_dir, bag3d_dir, bag3d_export_dir
-from bag3d.common.resources import resource_defs
 
 
 def create_sequence_header_file(template_file, output_file, version_3dbag):

diff --git a/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py b/packages/core/src/bag3d/core/assets/reconstruction/reconstruction.py
@@ -150,6 +150,7 @@ def create_roofer_config(
     polygon-source = "{footprint_file}"
     id-attribute = "identificatie"
     force-lod11-attribute = "kas_warenhuis"
+    lod11-fallback-area = 20000
 
     split-cjseq = true
     omit-metadata = true
@@ -165,6 +166,11 @@ def create_roofer_config(
     name = "AHN4"
     quality = 0
     source = {ahn4_files}
+
+    [[pointclouds]]
+    name = "AHN5"
+    quality = 2
+    source = {ahn5_files}
 
     [output-attributes]
     status = "b3_status"
@@ -231,9 +237,12 @@ def create_roofer_config(
     laz_files_ahn4 = [
         str(out_dir_ahn4 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res
     ]
-    out_dir_ahn5 = ahn_dir(
-        context.resources.file_store.file_store.data_dir, ahn_version=5
-    ).joinpath("tiles_200m")
+    if dir_tiles_200m_ahn5 is not None:
+        out_dir_ahn5 = Path(dir_tiles_200m_ahn5)
+    else:
+        out_dir_ahn5 = ahn_dir(
+            context.resources.file_store.file_store.data_dir, ahn_version=5
+        ).joinpath("tiles_200m")
     laz_files_ahn5 = [
         str(out_dir_ahn5 / f"t_{tile_id_ahn[0]}.laz") for tile_id_ahn in res
     ]

diff --git a/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql b/packages/core/src/bag3d/core/sqlfiles/top10nl_gebouw.sql
@@ -10,7 +10,6 @@ SELECT ogc_fid                                            AS fid
      , bronnauwkeurigheid::float4
      , mutatietype
      , typegebouw::text[]
-     , fysiekvoorkomen::text
      , hoogteklasse::text
      , hoogteniveau::int2
      , status::text

diff --git a/packages/core/tests/test_integration.py b/packages/core/tests/test_integration.py
@@ -31,6 +31,7 @@ def test_integration_reconstruction_and_export(
     # update quadtree
     og_quadtree = test_data_dir / "quadtree.tsv"
     export_dir = test_data_dir / "reconstruction_input" / "3DBAG" / "export"
+    export_dir.mkdir(exist_ok=True)
     os.system(f"cp {og_quadtree} {export_dir}")
 
     resources = {
@@ -71,7 +72,6 @@ def test_integration_reconstruction_and_export(
         for asset in all_reconstruction_assets
         if asset.key
         in {
-            AssetKey(["reconstruction", "cropped_input_and_config_nl"]),
             AssetKey(["reconstruction", "reconstructed_building_models_nl"]),
         }
     ]

diff --git a/scripts/backfill-status.py b/scripts/backfill-status.py
@@ -0,0 +1,75 @@
+QUERY_BACKFILL_STATUS = """
+query BackfillPartitionStatus($backfillId: String!) {
+  partitionBackfillOrError(backfillId: $backfillId) {
+    __typename
+    ... on PartitionBackfill {
+      id
+      status
+      partitionStatuses {
+        results {
+          partitionName
+          runId
+          runStatus
+        }
+      }
+      partitionStatuses {results {partitionName }}
+    }
+    ... on PythonError {
+      message
+      stack
+    }
+  }
+}
+"""
+
+variables = {
+    "backfillId": "backfill id",
+}
+reload_res = requests.post(
+    "http://{dagit_host}:3000/graphql?query={query_string}&variables={variables}".format(
+        dagit_host=dagit_host,
+        query_string=RELOAD_REPOSITORY_LOCATION_MUTATION,
+        variables=json.dumps(variables),
+    )
+).json()
+
+"""
+{
+  "data": {
+    "partitionBackfillOrError": {
+      "__typename": "PartitionBackfill",
+      "id": "byauudaj",
+      "status": "COMPLETED",
+      "partitionStatuses": {
+        "results": [
+          {
+            "partitionName": "30dz2",
+            "runId": "bd06fd2d-c11f-4cac-95a5-7da5a2da1bab",
+            "runStatus": "STARTED"
+          },
+          {
+            "partitionName": "30gz1",
+            "runId": "d2e8d73a-9673-4101-a0e5-cdd356603a50",
+            "runStatus": "STARTED"
+          },
+          {
+            "partitionName": "37bn1",
+            "runId": "5b545bfe-6c1b-4028-b467-118ef505b530",
+            "runStatus": "SUCCESS"
+          },
+          {
+            "partitionName": "37bn2",
+            "runId": "b1c66cc6-c041-438c-815b-e051be4052a2",
+            "runStatus": "SUCCESS"
+          },
+          {
+            "partitionName": "37en1",
+            "runId": "1f4618a8-be67-444b-98fe-a22d693a1fb2",
+            "runStatus": "SUCCESS"
+          }
+        ]
+      }
+    }
+  }
+}
+"""
diff --git a/scripts/reload-code-location.py b/scripts/reload-code-location.py
@@ -1,3 +1,13 @@
+import argparse
+import json
+
+import requests
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--host", help="dagster host", default="localhost")
+parser.add_argument("--port", help="dagster port", type=int, default=3000)
+parser.add_argument("--code-location", help="dagster code location", default="core")
+
 RELOAD_REPOSITORY_LOCATION_MUTATION = """
 mutation ($repositoryLocationName: String!) {
    reloadRepositoryLocation(repositoryLocationName: $repositoryLocationName) {
@@ -19,17 +29,24 @@
 }
 """
 
-dagit_host = "your_dagit_host_here"
+if __name__ == "__main__":
+    args = parser.parse_args()
 
-variables = {
-    "repositoryLocationName": "your_location_name_here",
-}
-reload_res = requests.post(
-    "http://{dagit_host}:3000/graphql?query={query_string}&variables={variables}".format(
-        dagit_host=dagit_host,
-        query_string=RELOAD_REPOSITORY_LOCATION_MUTATION,
-        variables=json.dumps(variables),
+    variables = {
+        "repositoryLocationName": args.code_location,
+    }
+    reload_res = requests.post(
+        "http://{dagster_host}:{dagster_port}/graphql?query={query_string}&variables={variables}".format(
+            dagster_host=args.host,
+            dagster_port=args.port,
+            query_string=RELOAD_REPOSITORY_LOCATION_MUTATION,
+            variables=json.dumps(variables),
+        )
+    ).json()
+
+    did_succeed = (
+        reload_res["data"]["reloadRepositoryLocation"]["__typename"]
+        == "RepositoryLocation"
     )
-).json()
 
-did_succeed = reload_res["data"]["reloadRepositoryLocation"]["__typename"] == "RepositoryLocation"
+    exit(not did_succeed)
diff --git a/scripts/roofer-logs-parse-debug.py b/scripts/roofer-logs-parse-debug.py
@@ -0,0 +1,52 @@
+"""Parse all dagster debug files and find the building IDs that are unfinished in the
+reconstruction.
+
+The dagster debug files are downloaded from the UI. Go to the Runs overview to see the
+individual runs, the drop-down menu of the 'View' button of the run has a
+'Download debug file' option, click that.
+
+Download all .gz files into a directory. That dirpath is the --debug-dir input.
+
+The script extracts the '[reconsturctor] start: <building path>' and
+'[reconstructor] finish: <building path>' records, matches the start-finish IDs and
+prints the IDs that don't have a 'finish' record.
+"""
+
+import argparse
+import gzip
+import json
+import re
+from pathlib import Path
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--debug-dir",
+    help="Directory with the dagster debug files that are gzipped.",
+    type=Path,
+)
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+
+    # (ID, started, finished)
+    buildings_started = []
+    buildings_finished = []
+    re_pat_start = re.compile(r"(?<=\[reconstructor\] start:) (/.*?\.\S*)")
+    re_pat_finish = re.compile(r"(?<=\[reconstructor\] finish:) (/.*?\.\S*)")
+    for p_gz in args.debug_dir.iterdir():
+        with gzip.open(p_gz, "rb") as f:
+            debug_messages = json.load(f)
+            for event in debug_messages["event_list"]:
+                if event["step_key"] == "reconstructed_building_models_nl":
+                    record = event["user_message"]
+                    if (res := re_pat_start.search(record)) is not None:
+                        building_id = Path(res.group(1)).stem.rstrip(".city")
+                        buildings_started.append(building_id)
+                    elif (res := re_pat_finish.search(record)) is not None:
+                        building_id = Path(res.group(1)).stem.rstrip(".city")
+                        buildings_finished.append(building_id)
+                    else:
+                        continue
+    buildings_unfinished = set(buildings_started) - set(buildings_finished)
+    print(buildings_unfinished)
diff --git a/scripts/roofer-logs-parse.py b/scripts/roofer-logs-parse.py
@@ -1,3 +1,13 @@
+"""Extract the reconstruction step timings from the roofer logs that are recorded by
+dagster.
+
+Queries the successful reconstructions through the dagster graphql API.
+
+Get the logs from the dagster storage.
+
+Parse the `[reconstructor t]` log records of roofer and write the values to a CSV.
+"""
+
 import argparse
 import csv
 import re

diff --git a/scripts/roofer-logs-plot.py b/scripts/roofer-logs-plot.py
@@ -1,17 +1,24 @@
 import pandas as pd
 from matplotlib import pyplot as plt
 
-reconstruction_times = pd.read_csv("scripts/roofer-logs.csv", index_col="building_id")
+reconstruction_times = pd.read_csv("scripts/reconstruction-times.csv").drop_duplicates(
+    subset=["building_id"]
+)
+reconstruction_times.set_index("building_id", inplace=True)
 
-fig = plt.figure(figsize=(10,7))
+fig = plt.figure(figsize=(10, 7))
 fig.subplots_adjust(bottom=0.3)
 p = reconstruction_times.median().plot(kind="bar")
 plt.title("median")
 plt.show()
 
 
-fig = plt.figure(figsize=(10,7))
+fig = plt.figure(figsize=(10, 7))
 fig.subplots_adjust(bottom=0.3)
 p = reconstruction_times.mean().plot(kind="bar")
 plt.title("mean")
 plt.show()
+
+reconstruction_times["total"] = reconstruction_times.sum(axis=1)
+above_10min = reconstruction_times[reconstruction_times["total"] > 600000]
+above_10min.to_csv("scripts/reconstruction-times-above_10min.csv")