Skip to content

Commit

Permalink
Add first version of presets and report.
Browse files Browse the repository at this point in the history
  • Loading branch information
Liraim committed Dec 11, 2024
1 parent ede9486 commit c194b4b
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 45 deletions.
83 changes: 64 additions & 19 deletions examples/metric_workbench.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,19 @@
"import pandas as pd\n",
"\n",
"from evidently import ColumnType\n",
"from evidently.model.widget import BaseWidgetInfo\n",
"\n",
"from evidently.v2.datasets import DatasetColumn\n",
"\n",
"from evidently.v2.datasets import Scorer\n",
"from evidently.v2.metrics import Metric\n",
"from evidently.v2.datasets import Dataset\n",
"from evidently.v2.metrics import MetricResult\n",
"from evidently.v2.metrics import SingleValue\n",
"from evidently.v2.metrics import SingleValueCheck"
"from evidently.v2.metrics import SingleValueCheck\n",
"from evidently.v2.metrics.base import MetricId\n",
"from evidently.v2.metrics.min import min_metric\n",
"from evidently.v2.presets import PresetResult"
]
},
{
Expand Down Expand Up @@ -123,21 +128,8 @@
" CustomColumnScorer(\"column_2\", my_scorer, alias=\"column 2 custom function\"),\n",
" CustomScorer(my_scorer2, alias=\"global custom function\"),\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd5aed998289f2f3",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:20:48.836818Z",
"start_time": "2024-12-09T18:20:48.830392Z"
}
},
"outputs": [],
"source": [
")\n",
"\n",
"dataset.as_dataframe()"
]
},
Expand All @@ -153,8 +145,6 @@
},
"outputs": [],
"source": [
"from evidently.descriptors import TextLength\n",
"\n",
"from evidently.v2.checks.numerical_checks import le, ge\n",
"from typing import Optional\n",
"from typing import List\n",
Expand Down Expand Up @@ -187,7 +177,7 @@
" return f\"Max value for {self._column_name}\"\n",
"\n",
"\n",
"result = max_metric(\"column_1\", checks=None).call(dataset, None)\n",
"result = max_metric(\"column_1\", checks=[]).call(dataset, None)\n",
"result"
]
},
Expand Down Expand Up @@ -232,6 +222,61 @@
"source": [
"results[0].value"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "83ca61bb9521db5f",
"metadata": {},
"outputs": [],
"source": [
"from evidently.v2.presets import MetricPreset\n",
"\n",
"class ColumnSummary(MetricPreset):\n",
" def __init__(self, column: str):\n",
" self._column = column\n",
"\n",
" def metrics(self) -> List[Metric]:\n",
" return [\n",
" min_metric(self._column),\n",
" max_metric(self._column),\n",
" ]\n",
" \n",
" def calculate(self, metric_results: Dict[MetricId, MetricResult]) -> PresetResult:\n",
" return PresetResult(widget=[\n",
" *metric_results[min_metric(self._column).id].widget,\n",
" *metric_results[max_metric(self._column).id].widget,\n",
" ])\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a5ae9f3e-d3b0-4a6f-8c1b-4049973e74fe",
"metadata": {},
"outputs": [],
"source": [
"ColumnSummary(\"column_1\").call(context)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "40d53c8d66c5fda4",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-11T22:47:23.058784Z",
"start_time": "2024-12-11T22:47:22.896178Z"
}
},
"outputs": [],
"source": [
"from evidently.v2.report import Report\n",
"\n",
"report = Report([min_metric(\"column_1\"), max_metric(\"column_1\")])\n",
"snapshot = report.run(dataset, None)\n",
"snapshot"
]
}
],
"metadata": {
Expand Down
23 changes: 14 additions & 9 deletions src/evidently/v2/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,7 @@ def widget(self, value: List[BaseWidgetInfo]):
self._widget = value


def render_results(results: Union[MetricResult, List[MetricResult]], html=True):
data = []
if isinstance(results, MetricResult):
data = [results]
else:
data = results
widgets = list(itertools.chain(*[item.widget for item in data]))
def render_widgets(widgets: List[BaseWidgetInfo]):
dashboard_id, dashboard_info, graphs = (
"metric_" + str(uuid.uuid4()).replace("-", ""),
DashboardInfo("Report", widgets=widgets),
Expand All @@ -61,11 +55,22 @@ def render_results(results: Union[MetricResult, List[MetricResult]], html=True):
dashboard_info=dashboard_info,
additional_graphs=graphs,
)
if html:
return HTML(inline_iframe_html_template(template_params))
return inline_iframe_html_template(template_params)


def render_results(results: Union[MetricResult, List[MetricResult]], html=True):
data = []
if isinstance(results, MetricResult):
data = [results]
else:
data = results
widgets = list(itertools.chain(*[item.widget for item in data]))
result = render_widgets(widgets)
if html:
return HTML(result)
return result


TResult = TypeVar("TResult", bound=MetricResult)

MetricReturnValue: TypeAlias = Tuple[TResult, BaseWidgetInfo]
Expand Down
5 changes: 4 additions & 1 deletion src/evidently/v2/metrics/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class GroupByMetric(Metric):
def __init__(self, metric: Metric, column_name: str, label: str):
def __init__(self, metric: Metric, column_name: str, label: object):
super().__init__(f"{metric.id}:group_by:{label}")
self._metric = metric
self._column_name = column_name
Expand All @@ -36,3 +36,6 @@ def __init__(self, metric: Metric, column_name: str):
def generate_metrics(self, context: Context) -> List[Metric]:
labels = context.column(self._column_name).labels()
return [GroupByMetric(self._metric, self._column_name, label) for label in labels]

def label_metric(self, label: object) -> Metric:
return GroupByMetric(self._metric, self._column_name, label)
8 changes: 4 additions & 4 deletions src/evidently/v2/metrics/min.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from typing import List
from typing import Optional

from evidently.v2.checks.base import SingleValueCheck
from evidently.v2.datasets import Dataset
from evidently.v2.metrics.base import Metric
from evidently.v2.metrics.base import SingleValue
from evidently.v2.metrics import Metric
from evidently.v2.metrics import SingleValue
from evidently.v2.metrics import SingleValueCheck


class MinMetric(Metric[SingleValue]):
Expand All @@ -14,7 +14,7 @@ def __init__(self, column: str, checks: Optional[List[SingleValueCheck]] = None)

def calculate(self, current_data: Dataset, reference_data: Optional[Dataset]) -> SingleValue:
data = current_data.column(self._column)
value = data.min()
value = data.data.min()
return SingleValue(value)

def display_name(self) -> str:
Expand Down
32 changes: 32 additions & 0 deletions src/evidently/v2/presets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import abc
import dataclasses
from typing import Dict
from typing import List

from evidently.model.widget import BaseWidgetInfo
from evidently.v2.metrics import Metric
from evidently.v2.metrics import MetricResult
from evidently.v2.metrics.base import MetricId
from evidently.v2.metrics.base import render_widgets
from evidently.v2.report import Context


@dataclasses.dataclass
class PresetResult:
widget: List[BaseWidgetInfo]

def _repr_html_(self):
return render_widgets(self.widget)


class MetricPreset:
def call(self, context: Context) -> PresetResult:
return self.calculate({metric.id: context.calculate_metric(metric) for metric in self.metrics()})

@abc.abstractmethod
def metrics(self) -> List[Metric]:
raise NotImplementedError()

@abc.abstractmethod
def calculate(self, metric_results: Dict[MetricId, MetricResult]) -> PresetResult:
raise NotImplementedError()
47 changes: 35 additions & 12 deletions src/evidently/v2/report.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
from itertools import chain
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import TypeVar

from ..base_metric import InputData
from .datasets import Dataset
from .datasets import DatasetColumn
from .metrics.base import Metric
from .metrics.base import MetricId
from .metrics.base import MetricResult
from .metrics.base import render_widgets

TResultType = TypeVar("TResultType", bound="MetricResult")

Expand All @@ -30,42 +32,63 @@ def labels(self):
class Context:
_configuration: Optional["Report"]
_metrics: Dict[MetricId, MetricResult]
_metrics_graph: dict
_data_columns: Dict[str, ContextColumnData]
_input_data: Tuple[Dataset, Optional[Dataset]]
_current_graph_level: dict

def __init__(self):
self._metrics = {}
self._configuration = None
self._data_columns = {}
self._metrics_graph = {}
self._current_graph_level = self._metrics_graph

def init_dataset(self, current_data: Dataset, reference_data: Optional[Dataset]):
self._input_data = (current_data, reference_data)
self._data_columns = {
column_name: ContextColumnData(current_data.column(column_name))
for column_name, info in current_data._data_definition._columns.items()
}

def get_metric_result(self, metric: Metric[TResultType]) -> TResultType:
raise NotImplementedError()

def column(self, column_name: str) -> ContextColumnData:
return self._data_columns[column_name]

def calculate_metric(self, metric: Metric[TResultType]) -> TResultType:
if metric.id not in self._current_graph_level:
self._current_graph_level[metric.id] = {}
prev_level = self._current_graph_level
self._current_graph_level = prev_level[metric.id]
if metric.id not in self._metrics:
self._metrics[metric.id] = metric.call(*self._input_data)
self._current_graph_level = prev_level
return self._metrics[metric.id]


class Snapshot:
_report: "Report"
_context: Context # stores report calculation progress

def __init__(self):
def __init__(self, report: "Report"):
self._report = report
self._context = Context()

def run(self, data):
raise NotImplementedError()
def run(self, current_data: Dataset, reference_data: Optional[Dataset]):
self._context.init_dataset(current_data, reference_data)
for metric in self._report._metrics:
self._context.calculate_metric(metric)

def _repr_html_(self):
return render_widgets(
list(chain(*[self._context._metrics[metric].widget for metric in self._context._metrics_graph.keys()]))
)


class Report:
def __init__(self, metrics: List[Metric]):
pass
self._metrics = metrics

def run(self, data: "InputData") -> Snapshot:
snapshot = Snapshot()
snapshot.report = self
snapshot.run(data)
def run(self, current_data: Dataset, reference_data: Optional[Dataset]) -> Snapshot:
snapshot = Snapshot(self)
snapshot.run(current_data, reference_data)
return snapshot

0 comments on commit c194b4b

Please sign in to comment.