Skip to content

Commit

Permalink
Fix SemanticSimilarity / CustomDescriptor index values. (#1120)
Browse files Browse the repository at this point in the history
Co-authored-by: Emeli Dral <[email protected]>
  • Loading branch information
Liraim and emeli-dral authored May 21, 2024
1 parent 901374a commit 0fc27a0
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 982 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"\n",
"from evidently.metrics import ColumnSummaryMetric, ColumnDistributionMetric, ColumnDriftMetric, DataDriftTable, TextDescriptorsDistribution\n",
"\n",
"from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextOverviewPreset\n",
"from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextOverviewPreset, TextEvals\n",
"\n",
"from evidently.descriptors import HuggingFaceModel, OpenAIPrompting \n",
"from evidently.descriptors import RegExp, BeginsWith, EndsWith, Contains, DoesNotContain, IncludesWords, ExcludesWords\n",
Expand Down Expand Up @@ -159,13 +159,17 @@
"source": [
"#Built-in descriptors without parameters\n",
"report = Report(metrics=[\n",
" ColumnSummaryMetric(column_name = Sentiment(display_name=\"Question sentiment\").for_column(\"question\")),\n",
" ColumnSummaryMetric(column_name = TextLength(display_name= \"Question length\").for_column(\"question\")),\n",
" ColumnSummaryMetric(column_name = OOV(display_name= \"Question out of vocabulary words\").for_column(\"question\")),\n",
" ColumnSummaryMetric(column_name = Sentiment(display_name=\"Response sentiment\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = NonLetterCharacterPercentage(display_name=\"Non letter characters in response\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = SentenceCount(display_name=\"Sentence count in response\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = WordCount(display_name=\"Word count in response\").for_column(\"response\")),\n",
" TextEvals(column_name=\"question\", descriptors=[\n",
" Sentiment(display_name=\"Question sentiment\"),\n",
" TextLength(display_name= \"Question length\"),\n",
" OOV(display_name= \"Question out of vocabulary words\")\n",
" ]),\n",
" TextEvals(column_name=\"response\", descriptors=[\n",
" Sentiment(display_name=\"Response sentiment\"),\n",
" NonLetterCharacterPercentage(display_name=\"Non letter characters in response\"),\n",
" SentenceCount(display_name=\"Sentence count in response\"),\n",
" WordCount(display_name=\"Word count in response\")\n",
" ])\n",
"])\n",
"\n",
"report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)], \n",
Expand All @@ -183,17 +187,17 @@
"source": [
"#Built-in descriptors with parameters\n",
"report = Report(metrics=[\n",
" ColumnSummaryMetric(column_name = BeginsWith(display_name=\"'How' question\", prefix=\"How\").for_column(\"question\")),\n",
" ColumnSummaryMetric(column_name = EndsWith(display_name=\"Assisrance might be needed\", suffix=\"for assistance.\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = RegExp(reg_exp=r\"^I\", display_name= \"Question begins with 'I'\").for_column(\"question\")), \n",
" ColumnSummaryMetric(column_name = IncludesWords(words_list=['invoice', 'salary'],\n",
" display_name=\"Questions about invoices and salary\").for_column(\"question\")),\n",
" ColumnSummaryMetric(column_name = ExcludesWords(words_list=['wrong', 'mistake'], \n",
" display_name=\"Responses without mention of mistakes\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = Contains(items=['medical leave'], \n",
" display_name=\"contains 'medical leave'\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = DoesNotContain(items=['employee portal'], \n",
" display_name=\"does not contain 'employee portal'\").for_column(\"response\")),\n",
" TextEvals(column_name=\"question\", descriptors=[\n",
" BeginsWith(display_name=\"'How' question\", prefix=\"How\"),\n",
" RegExp(reg_exp=r\"^I\", display_name= \"Question begins with 'I'\"),\n",
" IncludesWords(words_list=['invoice', 'salary'], display_name=\"Questions about invoices and salary\")\n",
" ]),\n",
" TextEvals(column_name=\"response\", descriptors=[\n",
" EndsWith(display_name=\"Assisrance might be needed\", suffix=\"for assistance.\"),\n",
" ExcludesWords(words_list=['wrong', 'mistake'], display_name=\"Responses without mention of mistakes\"),\n",
" Contains(items=['medical leave'], display_name=\"contains 'medical leave'\"),\n",
" DoesNotContain(items=['employee portal'], display_name=\"does not contain 'employee portal'\")\n",
" ])\n",
"])\n",
"\n",
"report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)], \n",
Expand Down Expand Up @@ -260,17 +264,11 @@
"#Descriptors with external models\n",
"#to run OpenAIPrompting descriptor make sure you set environement variable with openai token \n",
"report = Report(metrics=[\n",
" ColumnSummaryMetric(column_name = HuggingFaceModel(\"toxicity\", \"DaNLP/da-electra-hatespeech-detection\", {\"module_type\": \"measurement\"}, {\"toxic_label\": \"offensive\"}, \"toxicity\", display_name=\"Hugging Face Toxicity for response\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = OpenAIPrompting(prompt=pii_prompt, \n",
" prompt_replace_string=\"REPLACE\", \n",
" model=\"gpt-3.5-turbo-instruct\", \n",
" feature_type=\"num\",\n",
" display_name=\"PII for response (by gpt3.5)\").for_column(\"response\")),\n",
" ColumnSummaryMetric(column_name = OpenAIPrompting(prompt=negativity_prompt, \n",
" prompt_replace_string=\"REPLACE\", \n",
" model=\"gpt-3.5-turbo-instruct\", \n",
" feature_type=\"cat\",\n",
" display_name=\"Negativity for response (by gpt3.5)\").for_column(\"response\")),\n",
" TextEvals(column_name=\"response\", descriptors=[\n",
" HuggingFaceModel(\"toxicity\", \"DaNLP/da-electra-hatespeech-detection\", {\"module_type\": \"measurement\"}, {\"toxic_label\": \"offensive\"}, \"toxicity\", display_name=\"Hugging Face Toxicity for response\"),\n",
" OpenAIPrompting(prompt=pii_prompt, prompt_replace_string=\"REPLACE\", model=\"gpt-3.5-turbo-instruct\", feature_type=\"num\", display_name=\"PII for response (by gpt3.5)\"),\n",
" OpenAIPrompting(prompt=negativity_prompt, prompt_replace_string=\"REPLACE\", model=\"gpt-3.5-turbo-instruct\", feature_type=\"cat\", display_name=\"Negativity for response (by gpt3.5)\") \n",
" ])\n",
"])\n",
"\n",
"report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)], \n",
Expand All @@ -280,6 +278,31 @@
"report "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4838ba6b-7591-4186-b281-5d7ada978635",
"metadata": {},
"outputs": [],
"source": [
"#Descriptors with external models\n",
"#to run OpenAIPrompting descriptor make sure you set environement variable with openai token \n",
"\n",
"report = Report(metrics=[\n",
" TextEvals(column_name=\"response\", descriptors=[\n",
" OpenAIPrompting(prompt=pii_prompt, prompt_replace_string=\"REPLACE\", \n",
" model=\"gpt-4o\", feature_type=\"num\", context_column=\"question\",\n",
" display_name=\"PII for response with question in context (by gpt4o)\"),\n",
" ])\n",
"])\n",
"\n",
"report.run(reference_data=assistant_logs[:10],\n",
" current_data=assistant_logs[10:20],\n",
" column_mapping=column_mapping)\n",
"\n",
"report "
]
},
{
"cell_type": "markdown",
"id": "ba4ac83b-4d07-4050-95fa-45009ab5aa1d",
Expand Down Expand Up @@ -309,6 +332,14 @@
"#current dataset enriched with descriptors\n",
"report.datasets()[1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "225d87c3-703d-4ba8-b71f-16495f5e924d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
"from evidently.metrics import TextDescriptorsCorrelationMetric\n",
"from evidently.metrics import ColumnDriftMetric\n",
"\n",
"from evidently.descriptors import TextLength, TriggerWordsPresence, OOV, NonLetterCharacterPercentage, SentenceCount, WordCount, Sentiment, RegExp"
"from evidently.descriptors import TextLength, IncludesWords, OOV, NonLetterCharacterPercentage, SentenceCount, WordCount, Sentiment, RegExp, SemanticSimilarity"
]
},
{
Expand Down Expand Up @@ -154,8 +154,8 @@
"report = Report(metrics=[\n",
" TextDescriptorsDriftMetric(\"Review_Text\", descriptors={\n",
" \"Review Text Length\" : TextLength(),\n",
" \"Reviews about Dress\" : TriggerWordsPresence(words_list=['dress', 'gown']),\n",
" \"Review about Blouses\" : TriggerWordsPresence(words_list=['blouse', 'shirt']),\n",
" \"Reviews about Dress\" : IncludesWords(words_list=['dress', 'gown']),\n",
" \"Review about Blouses\" : IncludesWords(words_list=['blouse', 'shirt']),\n",
" \"Review Sentence Count\" : SentenceCount(),\n",
" \"Review Word Count\" : WordCount(),\n",
" \"Review Sentiment\" : Sentiment(),\n",
Expand Down Expand Up @@ -211,7 +211,7 @@
" SentenceCount(),\n",
" WordCount(),\n",
" Sentiment(),\n",
" TriggerWordsPresence(words_list=['blouse', 'shirt']),\n",
" IncludesWords(words_list=['blouse', 'shirt']),\n",
" ]\n",
" )\n",
"])\n",
Expand All @@ -220,30 +220,6 @@
"text_evals_report"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text_evals_report = Report(metrics=[\n",
" TextEvals(columns=[\"Review_Text\", \"Title\"], descriptors=[\n",
" OOV(),\n",
" NonLetterCharacterPercentage(),\n",
" TextLength(),\n",
" TriggerWordsPresence(words_list=['dress', 'gown']),\n",
" TriggerWordsPresence(words_list=['blouse', 'shirt']),\n",
" SentenceCount(),\n",
" WordCount(),\n",
" Sentiment(),\n",
" RegExp(reg_exp=r'.*\\?.*'),\n",
" ])\n",
"])\n",
"\n",
"text_overview_report.run(reference_data=reviews_ref[:100], current_data=reviews_cur[:100], column_mapping=column_mapping)\n",
"text_overview_report"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -321,6 +297,7 @@
" WordCount(),\n",
" Sentiment(),\n",
" RegExp(reg_exp=r'.*\\?.*'),\n",
" SemanticSimilarity(columns=[\"Review_Text\", \"Title\"])\n",
" ])\n",
"])\n",
"\n",
Expand Down
Loading

0 comments on commit 0fc27a0

Please sign in to comment.