Skip to content

Commit 7f03178

Browse files
authored
Merge pull request #257 from ing-bank/develop
Release v1.4
2 parents 1c22b88 + a5b9a30 commit 7f03178

File tree

11 files changed

+295
-18
lines changed

11 files changed

+295
-18
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
repos:
22
- repo: https://github.com/psf/black
3-
rev: 22.8.0
3+
rev: 22.10.0
44
hooks:
55
- id: black
66
- repo: https://github.com/pycqa/isort
@@ -18,7 +18,7 @@ repos:
1818
- tryceratops
1919
args: [ "--ignore=E501,E203,W503,TC003,TC101,TC300"]
2020
- repo: https://github.com/asottile/pyupgrade
21-
rev: v2.37.3
21+
rev: v3.1.0
2222
hooks:
2323
- id: pyupgrade
2424
args: ['--py36-plus','--exit-zero-even-if-changed']
@@ -34,7 +34,7 @@ repos:
3434
language: system
3535
pass_filenames: false
3636
- repo: https://github.com/nbQA-dev/nbQA
37-
rev: 1.4.0
37+
rev: 1.5.2
3838
hooks:
3939
- id: nbqa-black
4040
- id: nbqa-pyupgrade

examples/synthetic_data_streams/hyperplane.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
dataset_name = "hyperplane"
1212
v = "1"
1313

14-
# Monitor the each feature w.r.t. the label
14+
# Monitor each feature w.r.t. the label
1515
features = [f"index:attr{i}:output" for i in range(10)]
1616

1717
# Also monitor predictions w.r.t. the label (see below)

popmon/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ class OverviewSection(SectionModel):
140140
name: str = "Overview"
141141
"""Name of the overview section in the report"""
142142

143-
description: str = "Alerts aggregated per feature"
143+
description: str = "Overview of the dataset, analysis and alerts."
144144
"""Description of the overview section in the report"""
145145

146146

popmon/pipeline/metrics_pipelines.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from ..base import Module, Pipeline
4444
from ..config import Settings
4545
from ..hist.hist_splitter import HistSplitter
46+
from .timing import Timing
4647

4748

4849
def get_metrics_pipeline_class(reference_type, reference):
@@ -88,6 +89,10 @@ def create_metrics_pipeline(
8889
return pipeline
8990

9091

92+
def get_timing_module(key: str) -> List[Module]:
93+
return [Timing(store_key=key)]
94+
95+
9196
def get_splitting_modules(
9297
hists_key, features, time_axis
9398
) -> List[Union[Module, Pipeline]]:
@@ -251,11 +256,13 @@ def __init__(
251256
]
252257

253258
modules = (
254-
get_splitting_modules(hists_key, settings.features, settings.time_axis)
259+
get_timing_module("start_time")
260+
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
255261
+ reference_modules
256262
+ get_trend_modules(settings.comparison.window)
257263
+ get_static_bound_modules(settings.monitoring.pull_rules)
258264
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
265+
+ get_timing_module("end_time")
259266
)
260267
super().__init__(modules)
261268

@@ -312,11 +319,13 @@ def __init__(
312319
),
313320
]
314321
modules = (
315-
get_splitting_modules(hists_key, settings.features, settings.time_axis)
322+
get_timing_module("start_time")
323+
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
316324
+ reference_modules
317325
+ get_trend_modules(settings.comparison.window)
318326
+ get_static_bound_modules(settings.monitoring.pull_rules)
319327
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
328+
+ get_timing_module("end_time")
320329
)
321330
super().__init__(modules)
322331

@@ -369,11 +378,13 @@ def __init__(
369378
]
370379

371380
modules = (
372-
get_splitting_modules(hists_key, settings.features, settings.time_axis)
381+
get_timing_module("start_time")
382+
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
373383
+ reference_modules
374384
+ get_trend_modules(settings.comparison.window)
375385
+ get_dynamic_bound_modules(settings.monitoring.pull_rules)
376386
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
387+
+ get_timing_module("end_time")
377388
)
378389
super().__init__(modules)
379390

@@ -424,10 +435,12 @@ def __init__(
424435
]
425436

426437
modules = (
427-
get_splitting_modules(hists_key, settings.features, settings.time_axis)
438+
get_timing_module("start_time")
439+
+ get_splitting_modules(hists_key, settings.features, settings.time_axis)
428440
+ reference_modules
429441
+ get_trend_modules(settings.comparison.window)
430442
+ get_dynamic_bound_modules(settings.monitoring.pull_rules)
431443
+ get_traffic_light_modules(settings.monitoring.monitoring_rules)
444+
+ get_timing_module("end_time")
432445
)
433446
super().__init__(modules)

popmon/pipeline/report.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@
3535

3636

3737
def stability_report(
38-
hists, settings: Optional[Settings] = None, reference=None, **kwargs
38+
hists,
39+
settings: Optional[Settings] = None,
40+
reference=None,
41+
**kwargs,
3942
):
4043
"""Create a data stability monitoring html report for given dict of input histograms.
4144

popmon/pipeline/report_pipelines.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,9 @@ def __init__(
191191
read_key="traffic_lights",
192192
store_key=sections_key,
193193
settings=settings.report,
194+
reference_type=settings.reference_type,
195+
time_axis=settings.time_axis,
196+
bin_specs=settings.bin_specs,
194197
),
195198
# generate section with histogram
196199
HistogramSection(

popmon/pipeline/timing.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright (c) 2022 ING Wholesale Banking Advanced Analytics
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
# this software and associated documentation files (the "Software"), to deal in
5+
# the Software without restriction, including without limitation the rights to
6+
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
7+
# the Software, and to permit persons to whom the Software is furnished to do so,
8+
# subject to the following conditions:
9+
#
10+
# The above copyright notice and this permission notice shall be included in all
11+
# copies or substantial portions of the Software.
12+
#
13+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15+
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16+
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17+
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19+
from datetime import datetime
20+
21+
from ..base import Module
22+
23+
24+
class Timing(Module):
25+
"""Module to add the current timestamp to the datastore."""
26+
27+
_input_keys = ()
28+
_output_keys = ("store_key",)
29+
30+
def __init__(
31+
self,
32+
store_key: str,
33+
**kwargs,
34+
):
35+
"""Initialize an instance.
36+
37+
:param str store_key: key of input data to be stored in the datastore
38+
"""
39+
super().__init__()
40+
41+
self.store_key = store_key
42+
self.kwargs = kwargs
43+
44+
def transform(self):
45+
data = datetime.now()
46+
self.logger.info(f"storing time to {self.store_key}")
47+
return data

popmon/visualization/overview_section.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1919

2020

21+
from datetime import datetime
2122
from typing import Optional
2223

2324
import numpy as np
@@ -28,7 +29,8 @@
2829
from ..config import Report
2930
from ..resources import templates_env
3031
from ..utils import filter_metrics
31-
from ..visualization.utils import _prune
32+
from ..version import version as __version__
33+
from ..visualization.utils import _prune, get_reproduction_table, get_summary_table
3234

3335

3436
class OverviewSectionGenerator(Module):
@@ -37,14 +39,17 @@ class OverviewSectionGenerator(Module):
3739
which later will be used for the report generation.
3840
"""
3941

40-
_input_keys = ("read_key", "dynamic_bounds", "store_key")
42+
_input_keys = ("read_key", "dynamic_bounds", "store_key", "start_time", "end_time")
4143
_output_keys = ("store_key",)
4244

4345
def __init__(
4446
self,
4547
read_key,
4648
store_key,
4749
settings: Report,
50+
reference_type,
51+
time_axis,
52+
bin_specs,
4853
features=None,
4954
ignore_features=None,
5055
static_bounds=None,
@@ -68,6 +73,8 @@ def __init__(
6873
super().__init__()
6974
self.read_key = read_key
7075
self.store_key = store_key
76+
self.start_time = "start_time"
77+
self.end_time = "end_time"
7178
self.dynamic_bounds = dynamic_bounds
7279
self.static_bounds = static_bounds
7380

@@ -76,6 +83,9 @@ def __init__(
7683
self.prefix = prefix
7784
self.suffices = suffices
7885
self.ignore_stat_endswith = ignore_stat_endswith or []
86+
self.reference_type = reference_type
87+
self.time_axis = time_axis
88+
self.bin_specs = bin_specs
7989

8090
self.last_n = settings.last_n
8191
self.skip_first_n = settings.skip_first_n
@@ -92,6 +102,8 @@ def transform(
92102
data_obj: dict,
93103
dynamic_bounds: Optional[dict] = None,
94104
sections: Optional[list] = None,
105+
start_time: Optional[datetime] = None,
106+
end_time: Optional[datetime] = None,
95107
):
96108
assert isinstance(data_obj, dict)
97109
if dynamic_bounds is None:
@@ -104,12 +116,17 @@ def transform(
104116
features = self.get_features(list(data_obj.keys()))
105117

106118
self.logger.info(f'Generating section "{self.section_name}"')
107-
119+
time_windows = 0
108120
values = {}
121+
offset = ""
122+
max_timestamp = ""
109123
for feature in tqdm(features, ncols=100):
110124
df = data_obj.get(feature, pd.DataFrame())
111-
fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index))
125+
time_windows = len(df.index)
126+
offset = df.index.min()
127+
max_timestamp = df.index.max()
112128

129+
fdbounds = dynamic_bounds.get(feature, pd.DataFrame(index=df.index))
113130
assert all(df.index == fdbounds.index)
114131

115132
# prepare date labels
@@ -131,11 +148,43 @@ def transform(
131148
self.skip_last_n,
132149
)
133150

151+
# Dataset summary table and Analysis Details table
152+
tables = []
153+
bin_width = (
154+
self.bin_specs[self.time_axis]["bin_width"]
155+
if self.time_axis in self.bin_specs.keys()
156+
else 0
157+
)
158+
159+
if (
160+
self.time_axis in self.bin_specs.keys()
161+
and self.bin_specs[self.time_axis]["bin_offset"] > 0
162+
):
163+
offset = datetime.utcfromtimestamp(
164+
self.bin_specs[self.time_axis]["bin_offset"] // 1e9
165+
)
166+
tables.append(
167+
get_summary_table(
168+
len(features),
169+
time_windows,
170+
self.time_axis,
171+
self.reference_type,
172+
bin_width,
173+
offset,
174+
max_timestamp,
175+
)
176+
)
177+
178+
tables.append(get_reproduction_table(start_time, end_time, __version__))
179+
180+
# overview plots
134181
plots = [_plot_metrics(values)]
135182
# filter out potential empty plots (from skip empty plots)
136183
plots = [e for e in plots if len(e["plot"])]
137184
plots = sorted(plots, key=lambda plot: plot["name"])
138185

186+
plots = tables + plots
187+
139188
sections.append(
140189
{
141190
"section_title": self.section_name,
@@ -169,10 +218,11 @@ def _plot_metrics(
169218
)
170219

171220
return {
172-
"name": "Alert frequency per Feature",
221+
"name": "Alerts",
173222
"type": "alert",
174223
"description": "",
175224
"plot": plot,
225+
"layout": "",
176226
"full_width": True,
177227
}
178228

popmon/visualization/templates/section.html

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ <h3> {{ feature.titles.get(ref, ref) }} </h3>
5555
{%- else -%}
5656
<div class="row" >
5757
{%- for metric in plots -%}
58+
<script>
59+
{%- set curr = loop.index -%}
60+
layouts["{{ section_index }}{{ curr }}-{{ metric.type }}"] = {{ metric.layout | json_plot }};
61+
</script>
5862
{%- with metric=metric -%}
5963
{%- include 'card.html' -%}
6064
{%- endwith -%}

0 commit comments

Comments
 (0)