|
15 | 15 | "metadata": {},
|
16 | 16 | "outputs": [],
|
17 | 17 | "source": [
|
18 |
| - "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval" |
| 18 | + "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval\n", |
| 19 | + "from evidently.descriptors import SemanticSimilarity \n", |
| 20 | + "from evidently.descriptors import CustomColumnEval, CustomPairColumnEval" |
19 | 21 | ]
|
20 | 22 | },
|
21 | 23 | {
|
|
52 | 54 | "\n",
|
53 | 55 | "from evidently.metrics import ColumnSummaryMetric\n",
|
54 | 56 | "\n",
|
55 |
| - "from evidently.metric_preset import DataQualityPreset, TextOverviewPreset, TextEvals" |
| 57 | + "from evidently.metric_preset import DataQualityPreset, TextEvals" |
56 | 58 | ]
|
57 | 59 | },
|
58 | 60 | {
|
|
233 | 235 | "source": [
|
234 | 236 | "report = Report(metrics=[\n",
|
235 | 237 | " TextEvals(column_name=\"question\", descriptors=[\n",
|
236 |
| - " NegativityLLMEval(include_category=True) \n", |
| 238 | + " NegativityLLMEval(include_category=True),\n", |
| 239 | + " SemanticSimilarity(with_column=\"response\")\n", |
237 | 240 | " ]),\n",
|
238 | 241 | " TextEvals(column_name=\"response\", descriptors=[\n",
|
239 | 242 | " PIILLMEval(include_reasoning=False), \n",
|
|
308 | 311 | "print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())"
|
309 | 312 | ]
|
310 | 313 | },
|
| 314 | + { |
| 315 | + "cell_type": "markdown", |
| 316 | + "id": "7253dced-0c84-4e27-9c97-c4bb476ef110", |
| 317 | + "metadata": {}, |
| 318 | + "source": [ |
| 319 | + "### Custom descriptor over text data" |
| 320 | + ] |
| 321 | + }, |
| 322 | + { |
| 323 | + "cell_type": "code", |
| 324 | + "execution_count": null, |
| 325 | + "id": "c74f5f3d-56ac-42c1-b5e1-4c81411232b2", |
| 326 | + "metadata": {}, |
| 327 | + "outputs": [], |
| 328 | + "source": [ |
| 329 | + "def is_empty_string_callable(val1):\n", |
| 330 | + " return pd.Series([\"EMPTY\" if val == \"\" else \"NON EMPTY\" for val in val1], index=val1.index)\n", |
| 331 | + "\n", |
| 332 | + "empty_string = CustomColumnEval(\n", |
| 333 | + " func=is_empty_string_callable,\n", |
| 334 | + " feature_type=\"cat\",\n", |
| 335 | + " display_name=\"Empty response\"\n", |
| 336 | + ")\n", |
| 337 | + "\n", |
| 338 | + "report = Report(metrics=[\n", |
| 339 | + " ColumnSummaryMetric(column_name=empty_string.on(\"response\")),\n", |
| 340 | + "])\n", |
| 341 | + "\n", |
| 342 | + "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n", |
| 343 | + " current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n", |
| 344 | + " column_mapping=column_mapping)\n", |
| 345 | + "report " |
| 346 | + ] |
| 347 | + }, |
| 348 | + { |
| 349 | + "cell_type": "code", |
| 350 | + "execution_count": null, |
| 351 | + "id": "82c8c30b-095c-4aeb-a87b-4fd637295fe7", |
| 352 | + "metadata": {}, |
| 353 | + "outputs": [], |
| 354 | + "source": [ |
| 355 | + "def exact_match_callable(val1, val2):\n", |
| 356 | + " return pd.Series([\"MATCH\" if val else \"MISMATCH\" for val in val1 == val2])\n", |
| 357 | + "\n", |
| 358 | + "exact_match = CustomPairColumnEval(\n", |
| 359 | + " func=exact_match_callable,\n", |
| 360 | + " first_column=\"response\",\n", |
| 361 | + " second_column=\"question\",\n", |
| 362 | + " feature_type=\"cat\",\n", |
| 363 | + " display_name=\"Exact match between response and question\"\n", |
| 364 | + ")\n", |
| 365 | + "\n", |
| 366 | + "report = Report(metrics=[\n", |
| 367 | + " ColumnSummaryMetric(column_name=exact_match.as_column())\n", |
| 368 | + "])\n", |
| 369 | + "\n", |
| 370 | + "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n", |
| 371 | + " current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n", |
| 372 | + " column_mapping=column_mapping)\n", |
| 373 | + "report " |
| 374 | + ] |
| 375 | + }, |
311 | 376 | {
|
312 | 377 | "cell_type": "markdown",
|
313 | 378 | "id": "3806d7d8-5acf-45cb-b16b-3b4336dea6e0",
|
|
443 | 508 | " column_mapping=column_mapping)\n",
|
444 | 509 | "report "
|
445 | 510 | ]
|
446 |
| - }, |
447 |
| - { |
448 |
| - "cell_type": "code", |
449 |
| - "execution_count": null, |
450 |
| - "id": "c63c0d6e-e5fc-44ec-a1cd-ef85c7585973", |
451 |
| - "metadata": {}, |
452 |
| - "outputs": [], |
453 |
| - "source": [] |
454 | 511 | }
|
455 | 512 | ],
|
456 | 513 | "metadata": {
|
|
0 commit comments