Skip to content

Commit

Permalink
Add custom scorers.
Browse files Browse the repository at this point in the history
  • Loading branch information
Liraim committed Dec 9, 2024
1 parent 7644cb1 commit ede9486
Show file tree
Hide file tree
Showing 5 changed files with 227 additions and 41 deletions.
175 changes: 144 additions & 31 deletions examples/metric_workbench.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,156 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:07:57.136304Z",
"start_time": "2024-12-09T18:07:57.132965Z"
}
},
"outputs": [],
"source": [
"from typing import Dict\n",
"from typing import Union\n",
"\n",
"import pandas as pd\n",
"\n",
"from evidently import ColumnType\n",
"\n",
"from evidently.v2.datasets import DatasetColumn\n",
"\n",
"from evidently.v2.datasets import Scorer\n",
"from evidently.v2.metrics import Metric\n",
"from evidently.v2.datasets import DataDefinition\n",
"from evidently.v2.datasets import Dataset\n",
"from evidently.v2.metrics import SingleValue\n",
"from evidently.v2.metrics import SingleValueCheck"
],
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "de4b011f992f1165",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:07:59.040196Z",
"start_time": "2024-12-09T18:07:59.036939Z"
}
},
"outputs": [],
"source": [
"from typing import Optional\n",
"\n",
"\n",
"class TextLengthScorer(Scorer):\n",
" def __init__(self, column_name: str, alias: Optional[str] = None):\n",
" super().__init__(alias or f\"{column_name}: Text Length\")\n",
" self._column_name = column_name\n",
"\n",
" def generate_data(self, dataset: \"Dataset\") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:\n",
" lengths = dataset.column(self._column_name).data.apply(len)\n",
" return DatasetColumn(type=ColumnType.Numerical, data=lengths)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dc99c9bcf41dd669",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:20:21.744281Z",
"start_time": "2024-12-09T18:20:21.738294Z"
}
},
"outputs": [],
"execution_count": null
"source": [
"class ToxicityScorer(Scorer):\n",
" def __init__(self, column_name: str, alias: Optional[str] = None):\n",
" super().__init__(alias or f\"{column_name}: Toxicity\")\n",
" self._column_name = column_name\n",
" \n",
" def generate_data(self, dataset: \"Dataset\") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:\n",
" from evidently.descriptors import ToxicityLLMEval\n",
" from evidently.options.base import Options\n",
"\n",
" feature = ToxicityLLMEval().feature(self._column_name)\n",
" data = feature.generate_features(dataset.as_dataframe(), None, Options())\n",
" return {\n",
" col: DatasetColumn(type=feature.get_type(f\"{feature.get_fingerprint()}.{col}\"), data=data[col])\n",
" for col in data.columns\n",
" }"
]
},
{
"cell_type": "code",
"id": "ecc120915bd08cf4",
"execution_count": null,
"id": "48fc3742208e6385",
"metadata": {},
"outputs": [],
"source": [
"from evidently.descriptors import TextLength\n",
"def my_scorer(data: DatasetColumn) -> DatasetColumn:\n",
" return DatasetColumn(type=ColumnType.Numerical, data=data.data)\n",
"\n",
"from evidently.v2.datasets import FeatureScorer\n",
"def my_scorer2(dataset: Dataset) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:\n",
" return dataset.column(\"column_1\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "279361d330bca4d8",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:20:46.958375Z",
"start_time": "2024-12-09T18:20:42.774548Z"
}
},
"outputs": [],
"source": [
"from evidently.v2.scorers import CustomColumnScorer\n",
"from evidently.v2.scorers import CustomScorer\n",
"from evidently.v2.scorers import TextLength\n",
"\n",
"data = pd.DataFrame(data={\"column_1\": [1, 2, 3, 4, -1, 5], \"column_2\": [\"a\", \"aa\", \"aaaa\", \"aaaaaaa\", \"a\", \"aa\"]})\n",
"\n",
"dataset = Dataset.from_pandas(\n",
" data,\n",
" data_definition=None,\n",
" scorers={\n",
" \"text_length\": FeatureScorer(TextLength().feature(\"column_2\")),\n",
" },\n",
" scorers=[\n",
" TextLength(\"column_2\", alias=\"column 2 length\"),\n",
" ToxicityScorer(\"column_2\"),\n",
" CustomColumnScorer(\"column_2\", my_scorer, alias=\"column 2 custom function\"),\n",
" CustomScorer(my_scorer2, alias=\"global custom function\"),\n",
" ],\n",
")"
],
"outputs": [],
"execution_count": null
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fd5aed998289f2f3",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T18:20:48.836818Z",
"start_time": "2024-12-09T18:20:48.830392Z"
}
},
"outputs": [],
"source": [
"dataset.as_dataframe()"
],
"outputs": [],
"execution_count": null
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9f49a28098d1cad0",
"metadata": {},
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T17:44:07.086154Z",
"start_time": "2024-12-09T17:44:07.078132Z"
}
},
"outputs": [],
"source": [
"from evidently.descriptors import TextLength\n",
"\n",
Expand Down Expand Up @@ -86,22 +186,22 @@
" def display_name(self) -> str:\n",
" return f\"Max value for {self._column_name}\"\n",
"\n",
"# usage example\n",
"data = pd.DataFrame(data={\"column_1\": [1, 2, 3, 4, -1, 5], \"column_2\": [\"a\", \"aa\", \"aaaa\", \"aaaaaaa\", \"a\", \"aa\"]})\n",
"\n",
"dataset = Dataset.from_pandas(data, data_definition=None, scorers={\n",
" \"text_length\": FeatureScorer(TextLength().feature(\"column_2\")),\n",
"})\n",
"result = max_metric(\"column_1\", checks=None).call(dataset, None)\n",
"result"
],
"outputs": [],
"execution_count": null
]
},
{
"cell_type": "code",
"id": "8ee572e1a08c1d89",
"metadata": {},
"execution_count": null,
"id": "9566651ec9d1bee1",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T17:45:16.507805Z",
"start_time": "2024-12-09T17:45:16.316855Z"
}
},
"outputs": [],
"source": [
"from evidently.v2.metrics.group_by import GroupBy\n",
"from evidently.v2.metrics.base import render_results\n",
Expand All @@ -111,14 +211,27 @@
"\n",
"context.init_dataset(dataset, None)\n",
"\n",
"metrics = GroupBy(max_metric(\"column_1\"), \"column_2\").generate_metrics(context)\n",
"metrics = GroupBy(max_metric(\"column 2 length\"), \"column_1\").generate_metrics(context)\n",
"\n",
"results = [metric.call(dataset, None) for metric in metrics]\n",
"\n",
"render_results(results)"
],
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f8e8eaadc002b5d1",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-09T17:44:36.385681Z",
"start_time": "2024-12-09T17:44:36.376478Z"
}
},
"outputs": [],
"execution_count": null
"source": [
"results[0].value"
]
}
],
"metadata": {
Expand Down
36 changes: 26 additions & 10 deletions src/evidently/v2/datasets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import abc
import dataclasses
from typing import Dict
from typing import List
from typing import Optional
from typing import Union

import pandas as pd

Expand Down Expand Up @@ -35,24 +38,34 @@ def __init__(self, type: ColumnType, data: pd.Series) -> None:


class Scorer:
def generate_data(self, dataset: "Dataset") -> Dict[str, DatasetColumn]:
def __init__(self, alias: str):
self._alias = alias

@abc.abstractmethod
def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
raise NotImplementedError()

@property
def alias(self) -> str:
return self._alias


class FeatureScorer(Scorer):
def __init__(self, feature: GeneratedFeatures):
self.feature = feature
def __init__(self, feature: GeneratedFeatures, alias: Optional[str] = None):
super().__init__(alias)
self._feature = feature
self._alias = alias

def generate_data(self, dataset: "Dataset") -> Dict[str, DatasetColumn]:
feature = self.feature.generate_features(dataset.as_dataframe(), None, Options())
def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
feature = self._feature.generate_features(dataset.as_dataframe(), None, Options())
if len(feature.columns) > 1:
return {
col: DatasetColumn(
type=self.feature.get_type(f"{self.feature.get_fingerprint()}.{col}"), data=feature[col]
type=self._feature.get_type(f"{self._feature.get_fingerprint()}.{col}"), data=feature[col]
)
for col in feature.columns
}
return {"": DatasetColumn(type=self.feature.get_type(), data=feature[feature.columns[0]])}
return DatasetColumn(type=self._feature.get_type(), data=feature[feature.columns[0]])


class Dataset:
Expand All @@ -63,12 +76,15 @@ def from_pandas(
cls,
data: pd.DataFrame,
data_definition: Optional[DataDefinition] = None,
scorers: Optional[Dict[str, Scorer]] = None,
scorers: Optional[List[Scorer]] = None,
) -> "Dataset":
dataset = PandasDataset(data, data_definition)
for key, scorer in scorers.items():
for scorer in scorers or []:
key = scorer.alias
new_column = scorer.generate_data(dataset)
if len(new_column) > 1:
if isinstance(new_column, DatasetColumn):
data[key] = new_column.data
elif len(new_column) > 1:
for col, value in new_column.items():
data[f"{key}.{col}"] = value.data
else:
Expand Down
9 changes: 9 additions & 0 deletions src/evidently/v2/scorers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from ._custom_scorers import CustomColumnScorer
from ._custom_scorers import CustomScorer
from ._text_length import TextLength

__all__ = [
"CustomColumnScorer",
"CustomScorer",
"TextLength",
]
30 changes: 30 additions & 0 deletions src/evidently/v2/scorers/_custom_scorers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from typing import Callable
from typing import Dict
from typing import Optional
from typing import Union

from evidently.v2.datasets import Dataset
from evidently.v2.datasets import DatasetColumn
from evidently.v2.datasets import Scorer


class CustomColumnScorer(Scorer):
def __init__(self, column_name: str, func: Callable[[DatasetColumn], DatasetColumn], alias: Optional[str] = None):
super().__init__(alias)
self._column_name = column_name
self._func = func

def generate_data(self, dataset: Dataset) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
column_data = dataset.column(self._column_name)
return self._func(column_data)


class CustomScorer(Scorer):
def __init__(
self, func: Callable[[Dataset], Union[DatasetColumn, Dict[str, DatasetColumn]]], alias: Optional[str] = None
):
super().__init__(alias)
self._func = func

def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
return self._func(dataset)
18 changes: 18 additions & 0 deletions src/evidently/v2/scorers/_text_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import Dict
from typing import Optional
from typing import Union

from evidently import ColumnType
from evidently.v2.datasets import Dataset
from evidently.v2.datasets import DatasetColumn
from evidently.v2.datasets import Scorer


class TextLength(Scorer):
def __init__(self, column_name: str, alias: Optional[str] = None):
super().__init__(alias)
self._column_name: str = column_name

def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
column_items_lengths = dataset.as_dataframe()[self._column_name].apply(len)
return DatasetColumn(type=ColumnType.Numerical, data=column_items_lengths)

0 comments on commit ede9486

Please sign in to comment.