Skip to content

Commit a496e38

Browse files
committed
Parse roofer logs from dagster
1 parent a5629e6 commit a496e38

File tree

3 files changed

+159
-0
lines changed

3 files changed

+159
-0
lines changed

scripts/reload-code-location.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
RELOAD_REPOSITORY_LOCATION_MUTATION = """
2+
mutation ($repositoryLocationName: String!) {
3+
reloadRepositoryLocation(repositoryLocationName: $repositoryLocationName) {
4+
__typename
5+
... on RepositoryLocation {
6+
name
7+
repositories {
8+
name
9+
}
10+
isReloadSupported
11+
}
12+
... on RepositoryLocationLoadFailure {
13+
name
14+
error {
15+
message
16+
}
17+
}
18+
}
19+
}
20+
"""
21+
22+
dagit_host = "your_dagit_host_here"
23+
24+
variables = {
25+
"repositoryLocationName": "your_location_name_here",
26+
}
27+
reload_res = requests.post(
28+
"http://{dagit_host}:3000/graphql?query={query_string}&variables={variables}".format(
29+
dagit_host=dagit_host,
30+
query_string=RELOAD_REPOSITORY_LOCATION_MUTATION,
31+
variables=json.dumps(variables),
32+
)
33+
).json()
34+
35+
did_succeed = reload_res["data"]["reloadRepositoryLocation"]["__typename"] == "RepositoryLocation"

scripts/roofer-logs-parse.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
import argparse
2+
import csv
3+
import re
4+
from pathlib import Path
5+
6+
import requests
7+
8+
parser = argparse.ArgumentParser()
9+
parser.add_argument("--host", help="dagster host", default="localhost")
10+
parser.add_argument("--port", help="dagster port", type=int, default=3000)
11+
parser.add_argument(
12+
"--storage",
13+
help="dagster storage location",
14+
required=True,
15+
type=Path,
16+
default=Path("/opt/dagster/dagster_home/storage"),
17+
)
18+
parser.add_argument("-o", "--output", required=True, type=Path)
19+
20+
ROOFER_SUCCESS_QUERY = """
21+
query FilteredRunsQuery {
22+
runsOrError(
23+
filter: { statuses: [SUCCESS] pipelineName: "nl_reconstruct_debug" }
24+
) {
25+
__typename
26+
... on Runs {
27+
results {
28+
runId
29+
jobName
30+
status
31+
runConfigYaml
32+
startTime
33+
endTime
34+
pipelineName
35+
}
36+
}
37+
}
38+
}
39+
"""
40+
41+
if __name__ == "__main__":
42+
args = parser.parse_args()
43+
dagster_host = args.host
44+
dagster_port = args.port
45+
dagster_storage = args.storage
46+
47+
roofer_res = requests.post(
48+
"http://{dagster_host}:{dagster_port}/graphql?query={query_string}".format(
49+
dagster_host=dagster_host,
50+
dagster_port=dagster_port,
51+
query_string=ROOFER_SUCCESS_QUERY,
52+
)
53+
).json()
54+
55+
logpaths_stderr = [
56+
p
57+
for i in roofer_res["data"]["runsOrError"]["results"]
58+
for p in dagster_storage.joinpath(i["runId"], "compute_logs").glob("*.err")
59+
if p.is_file()
60+
]
61+
62+
# "/opt/dagster/dagster_home/storage/0dbb1fa5-6dbf-454a-aed0-d94ec8d34fc0/compute_logs/ecsdmnmy.err"
63+
#
64+
# record = "2024-11-29 10:26:25 +0000 - dagster - INFO - nl_reconstruct_debug - 0dbb1fa5-6dbf-454a-aed0-d94ec8d34fc0 - reconstructed_building_models_nl - [2024-11-29 10:26:21.441] [stdout] [debug] [reconstructor t] /data/3DBAG/crop_reconstruct/6/240/8/objects/NL.IMBAG.Pand.1709100000243562/reconstruct/NL.IMBAG.Pand.1709100000243562.city.jsonl ((ArrangementOptimiser, 0),(ArrangementBuilder, 0),(SegmentRasteriser, 6),(LineRegulariser, 0),(PlaneIntersector, 0),(extrude, 2),(LineDetector, 0),(AlphaShaper_ground, 1),(PlaneDetector_ground, 2),(AlphaShaper, 0),(PlaneDetector, 1),)"
65+
# record = "2024-11-29 10:26:25 +0000 - dagster - INFO - nl_reconstruct_debug - 0dbb1fa5-6dbf-454a-aed0-d94ec8d34fc0 - reconstructed_building_models_nl - [2024-11-29 10:26:21.429] [stdout] [debug] [reconstructor] finish: /data/3DBAG/crop_reconstruct/6/240/8/objects/NL.IMBAG.Pand.1709100000244547/reconstruct/NL.IMBAG.Pand.1709100000244547.city.jsonl"
66+
67+
records = []
68+
re_pat = re.compile(r"(?<=\[reconstructor t\]) (/.*?\.\S*) (.*)")
69+
with args.output.open("w") as f:
70+
csvwriter = csv.DictWriter(
71+
f,
72+
quoting=csv.QUOTE_STRINGS,
73+
fieldnames=[
74+
"building_id",
75+
"ArrangementOptimiser",
76+
"ArrangementBuilder",
77+
"SegmentRasteriser",
78+
"LineRegulariser",
79+
"PlaneIntersector",
80+
"extrude",
81+
"LineDetector",
82+
"AlphaShaper_ground",
83+
"PlaneDetector_ground",
84+
"AlphaShaper",
85+
"PlaneDetector",
86+
],
87+
)
88+
csvwriter.writeheader()
89+
for logpath in logpaths_stderr:
90+
with logpath.open() as f:
91+
for record in f:
92+
if (res := re.search(re_pat, record)) is not None:
93+
record_parsed = {}
94+
try:
95+
if len(res.groups()) == 2:
96+
# res.group(1) is eg.: /data/3DBAG/crop_reconstruct/6/240/8/objects/NL.IMBAG.Pand.1709100000243562/reconstruct/NL.IMBAG.Pand.1709100000243562.city.jsonl
97+
building_id = Path(res.group(1)).stem.rstrip(".city")
98+
record_parsed["building_id"] = building_id
99+
# res.group(2) is eg: (ArrangementOptimiser, 0),(ArrangementBuilder, 0),(SegmentRasteriser, 6),(LineRegulariser, 0),(PlaneIntersector, 0),(extrude, 2),(LineDetector, 0),(AlphaShaper_ground, 1),(PlaneDetector_ground, 2),(AlphaShaper, 0),(PlaneDetector, 1),)
100+
for i in res.group(2).split("),("):
101+
_k = i.strip(" ()").strip("(),").split(", ")
102+
record_parsed[_k[0]] = int(_k[1])
103+
csvwriter.writerow(
104+
record_parsed
105+
) # records.append(record_parsed)
106+
except Exception as e:
107+
continue

scripts/roofer-logs-plot.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import pandas as pd
2+
from matplotlib import pyplot as plt
3+
4+
reconstruction_times = pd.read_csv("scripts/roofer-logs.csv", index_col="building_id")
5+
6+
fig = plt.figure(figsize=(10,7))
7+
fig.subplots_adjust(bottom=0.3)
8+
p = reconstruction_times.median().plot(kind="bar")
9+
plt.title("median")
10+
plt.show()
11+
12+
13+
fig = plt.figure(figsize=(10,7))
14+
fig.subplots_adjust(bottom=0.3)
15+
p = reconstruction_times.mean().plot(kind="bar")
16+
plt.title("mean")
17+
plt.show()

0 commit comments

Comments
 (0)