Skip to content

Commit 5a653f9

Browse files
authored
Added exaples for custom descriptors, semantic similarity; removed TextOverviewPreset (#1288)
1 parent 2bb5e6d commit 5a653f9

3 files changed

+94
-18
lines changed

examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
"from evidently.metrics import ColumnSummaryMetric, ColumnDistributionMetric, ColumnDriftMetric, DataDriftTable, TextDescriptorsDistribution, ColumnCategoryMetric\n",
5151
"from evidently.tests import TestColumnValueMin, TestColumnValueMean, TestCategoryShare, TestShareOfOutRangeValues\n",
5252
"\n",
53-
"from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextOverviewPreset, TextEvals\n",
53+
"from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextEvals\n",
5454
"\n",
5555
"from evidently.descriptors import HuggingFaceModel, HuggingFaceToxicityModel, OpenAIPrompting \n",
5656
"from evidently.descriptors import RegExp, BeginsWith, EndsWith, Contains, DoesNotContain, IncludesWords, ExcludesWords\n",

examples/how_to_questions/how_to_use_llm_judge_template.ipynb

+68-11
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
18-
"from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval"
18+
"from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval\n",
19+
"from evidently.descriptors import SemanticSimilarity \n",
20+
"from evidently.descriptors import CustomColumnEval, CustomPairColumnEval"
1921
]
2022
},
2123
{
@@ -52,7 +54,7 @@
5254
"\n",
5355
"from evidently.metrics import ColumnSummaryMetric\n",
5456
"\n",
55-
"from evidently.metric_preset import DataQualityPreset, TextOverviewPreset, TextEvals"
57+
"from evidently.metric_preset import DataQualityPreset, TextEvals"
5658
]
5759
},
5860
{
@@ -233,7 +235,8 @@
233235
"source": [
234236
"report = Report(metrics=[\n",
235237
" TextEvals(column_name=\"question\", descriptors=[\n",
236-
" NegativityLLMEval(include_category=True) \n",
238+
" NegativityLLMEval(include_category=True),\n",
239+
" SemanticSimilarity(with_column=\"response\")\n",
237240
" ]),\n",
238241
" TextEvals(column_name=\"response\", descriptors=[\n",
239242
" PIILLMEval(include_reasoning=False), \n",
@@ -308,6 +311,68 @@
308311
"print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())"
309312
]
310313
},
314+
{
315+
"cell_type": "markdown",
316+
"id": "7253dced-0c84-4e27-9c97-c4bb476ef110",
317+
"metadata": {},
318+
"source": [
319+
"### Custom descriptor over text data"
320+
]
321+
},
322+
{
323+
"cell_type": "code",
324+
"execution_count": null,
325+
"id": "c74f5f3d-56ac-42c1-b5e1-4c81411232b2",
326+
"metadata": {},
327+
"outputs": [],
328+
"source": [
329+
"def is_empty_string_callable(val1):\n",
330+
" return pd.Series([\"EMPTY\" if val == \"\" else \"NON EMPTY\" for val in val1], index=val1.index)\n",
331+
"\n",
332+
"empty_string = CustomColumnEval(\n",
333+
" func=is_empty_string_callable,\n",
334+
" feature_type=\"cat\",\n",
335+
" display_name=\"Empty response\"\n",
336+
")\n",
337+
"\n",
338+
"report = Report(metrics=[\n",
339+
" ColumnSummaryMetric(column_name=empty_string.on(\"response\")),\n",
340+
"])\n",
341+
"\n",
342+
"report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n",
343+
" current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n",
344+
" column_mapping=column_mapping)\n",
345+
"report "
346+
]
347+
},
348+
{
349+
"cell_type": "code",
350+
"execution_count": null,
351+
"id": "82c8c30b-095c-4aeb-a87b-4fd637295fe7",
352+
"metadata": {},
353+
"outputs": [],
354+
"source": [
355+
"def exact_match_callable(val1, val2):\n",
356+
" return pd.Series([\"MATCH\" if val else \"MISMATCH\" for val in val1 == val2])\n",
357+
"\n",
358+
"exact_match = CustomPairColumnEval(\n",
359+
" func=exact_match_callable,\n",
360+
" first_column=\"response\",\n",
361+
" second_column=\"question\",\n",
362+
" feature_type=\"cat\",\n",
363+
" display_name=\"Exact match between response and question\"\n",
364+
")\n",
365+
"\n",
366+
"report = Report(metrics=[\n",
367+
" ColumnSummaryMetric(column_name=exact_match.as_column())\n",
368+
"])\n",
369+
"\n",
370+
"report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n",
371+
" current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n",
372+
" column_mapping=column_mapping)\n",
373+
"report "
374+
]
375+
},
311376
{
312377
"cell_type": "markdown",
313378
"id": "3806d7d8-5acf-45cb-b16b-3b4336dea6e0",
@@ -443,14 +508,6 @@
443508
" column_mapping=column_mapping)\n",
444509
"report "
445510
]
446-
},
447-
{
448-
"cell_type": "code",
449-
"execution_count": null,
450-
"id": "c63c0d6e-e5fc-44ec-a1cd-ef85c7585973",
451-
"metadata": {},
452-
"outputs": [],
453-
"source": []
454511
}
455512
],
456513
"metadata": {

examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb

+25-6
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
"from evidently.report import Report\n",
3737
"from evidently.test_suite import TestSuite\n",
3838
"\n",
39-
"from evidently.metric_preset import TextOverviewPreset, TextEvals\n",
39+
"from evidently.metric_preset import TextEvals\n",
4040
"\n",
4141
"from evidently.metrics import TextDescriptorsDriftMetric\n",
4242
"from evidently.metrics import TextDescriptorsDistribution\n",
@@ -230,7 +230,7 @@
230230
"source": [
231231
"#NO descriptors\n",
232232
"text_overview_report = Report(metrics=[\n",
233-
" TextOverviewPreset(column_name=\"Review_Text\")\n",
233+
" TextEvals(column_name=\"Review_Text\")\n",
234234
"])\n",
235235
"\n",
236236
"text_overview_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
@@ -246,7 +246,8 @@
246246
"#NO descriptors, several columns\n",
247247
"\n",
248248
"text_overview_report = Report(metrics=[\n",
249-
" TextOverviewPreset(columns=[\"Review_Text\", \"Title\"])\n",
249+
" TextEvals(column_name=\"Review_Text\"),\n",
250+
" TextEvals(column_name=\"Title\"),\n",
250251
"])\n",
251252
"\n",
252253
"text_overview_report.run(reference_data=reviews_ref[:100], current_data=reviews_cur[:100], column_mapping=column_mapping)\n",
@@ -263,7 +264,7 @@
263264
"source": [
264265
"#WITH descriptors\n",
265266
"text_overview_report = Report(metrics=[\n",
266-
" TextOverviewPreset(column_name=\"Review_Text\", descriptors=[\n",
267+
" TextEvals(column_name=\"Review_Text\", descriptors=[\n",
267268
" OOV(),\n",
268269
" NonLetterCharacterPercentage(),\n",
269270
" TextLength(),\n",
@@ -287,7 +288,18 @@
287288
"outputs": [],
288289
"source": [
289290
"text_overview_report = Report(metrics=[\n",
290-
" TextOverviewPreset(columns=[\"Review_Text\", \"Title\"], descriptors=[\n",
291+
" TextEvals(column_name=\"Review_Text\", descriptors=[\n",
292+
" OOV(),\n",
293+
" NonLetterCharacterPercentage(),\n",
294+
" TextLength(),\n",
295+
" IncludesWords(words_list=['dress', 'gown']),\n",
296+
" IncludesWords(words_list=['blouse', 'shirt']),\n",
297+
" SentenceCount(),\n",
298+
" WordCount(),\n",
299+
" Sentiment(),\n",
300+
" RegExp(reg_exp=r'.*\\?.*'),\n",
301+
" ]),\n",
302+
" TextEvals(column_name=\"Title\", descriptors=[\n",
291303
" OOV(),\n",
292304
" NonLetterCharacterPercentage(),\n",
293305
" TextLength(),\n",
@@ -340,12 +352,19 @@
340352
"outputs": [],
341353
"source": [
342354
"summary_report = Report(metrics=[\n",
343-
" ColumnSummaryMetric(column_name=SemanticSimilarity().on([\"Review_Text\", \"Title\"]))\n",
355+
" ColumnSummaryMetric(column_name=SemanticSimilarity(with_column=\"Title\").on(\"Review_Text\"))\n",
344356
"])\n",
345357
"\n",
346358
"summary_report.run(reference_data=reviews_ref[:10], current_data=reviews_cur[:10], column_mapping=column_mapping)\n",
347359
"summary_report"
348360
]
361+
},
362+
{
363+
"cell_type": "code",
364+
"execution_count": null,
365+
"metadata": {},
366+
"outputs": [],
367+
"source": []
349368
}
350369
],
351370
"metadata": {

0 commit comments

Comments
 (0)