Skip to content

Commit

Permalink
Fix column name for FeatureScorer.
Browse files Browse the repository at this point in the history
Allow multiple scorer with same names (auto suffix).
  • Loading branch information
Liraim committed Dec 12, 2024
1 parent c194b4b commit c350d95
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions src/evidently/v2/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,8 @@ def alias(self) -> str:

class FeatureScorer(Scorer):
def __init__(self, feature: GeneratedFeatures, alias: Optional[str] = None):
super().__init__(alias)
super().__init__(alias or f"{feature.as_column().display_name}")
self._feature = feature
self._alias = alias

def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
feature = self._feature.generate_features(dataset.as_dataframe(), None, Options())
Expand All @@ -68,6 +67,15 @@ def generate_data(self, dataset: "Dataset") -> Union[DatasetColumn, Dict[str, Da
return DatasetColumn(type=self._feature.get_type(), data=feature[feature.columns[0]])


def _determine_scorer_column_name(alias: str, columns: List[str]):
index = 1
key = alias
while key in columns:
key = f"{alias}_{index}"
index += 1
return key


class Dataset:
_data_definition: DataDefinition

Expand All @@ -80,7 +88,7 @@ def from_pandas(
) -> "Dataset":
dataset = PandasDataset(data, data_definition)
for scorer in scorers or []:
key = scorer.alias
key = _determine_scorer_column_name(scorer.alias, data.columns)
new_column = scorer.generate_data(dataset)
if isinstance(new_column, DatasetColumn):
data[key] = new_column.data
Expand Down

0 comments on commit c350d95

Please sign in to comment.