From 5a653f97c76d702701d05492867ed72a51ff2298 Mon Sep 17 00:00:00 2001
From: Emeli Dral <emeli@evidentlyai.com>
Date: Fri, 6 Sep 2024 22:12:51 +0100
Subject: [PATCH] Added exaples for custom descriptors, semantic similarity;
 removed TextOverviewPreset (#1288)

---
 ...o_evaluate_llm_with_text_descriptors.ipynb |  2 +-
 .../how_to_use_llm_judge_template.ipynb       | 79 ++++++++++++++++---
 ...descriptors_in_text_specific_metrics.ipynb | 31 ++++++--
 3 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb b/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb
index b4da931bc1..6ed6daf5f3 100644
--- a/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb
+++ b/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb
@@ -50,7 +50,7 @@
     "from evidently.metrics import ColumnSummaryMetric, ColumnDistributionMetric, ColumnDriftMetric, DataDriftTable, TextDescriptorsDistribution, ColumnCategoryMetric\n",
     "from evidently.tests import TestColumnValueMin, TestColumnValueMean, TestCategoryShare, TestShareOfOutRangeValues\n",
     "\n",
-    "from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextOverviewPreset, TextEvals\n",
+    "from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextEvals\n",
     "\n",
     "from evidently.descriptors import HuggingFaceModel, HuggingFaceToxicityModel, OpenAIPrompting \n",
     "from evidently.descriptors import RegExp, BeginsWith, EndsWith, Contains, DoesNotContain, IncludesWords, ExcludesWords\n",
diff --git a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
index 202330f5d6..d6f57a6ea5 100644
--- a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
+++ b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb
@@ -15,7 +15,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval"
+    "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval\n",
+    "from evidently.descriptors import SemanticSimilarity \n",
+    "from evidently.descriptors import CustomColumnEval, CustomPairColumnEval"
    ]
   },
   {
@@ -52,7 +54,7 @@
     "\n",
     "from evidently.metrics import ColumnSummaryMetric\n",
     "\n",
-    "from evidently.metric_preset import DataQualityPreset, TextOverviewPreset, TextEvals"
+    "from evidently.metric_preset import DataQualityPreset, TextEvals"
    ]
   },
   {
@@ -233,7 +235,8 @@
    "source": [
     "report = Report(metrics=[\n",
     "    TextEvals(column_name=\"question\", descriptors=[\n",
-    "        NegativityLLMEval(include_category=True)   \n",
+    "        NegativityLLMEval(include_category=True),\n",
+    "        SemanticSimilarity(with_column=\"response\")\n",
     "    ]),\n",
     "    TextEvals(column_name=\"response\", descriptors=[\n",
     "        PIILLMEval(include_reasoning=False), \n",
@@ -308,6 +311,68 @@
     "print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "7253dced-0c84-4e27-9c97-c4bb476ef110",
+   "metadata": {},
+   "source": [
+    "### Custom descriptor over text data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c74f5f3d-56ac-42c1-b5e1-4c81411232b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def is_empty_string_callable(val1):\n",
+    "    return pd.Series([\"EMPTY\" if val == \"\" else \"NON EMPTY\" for val in val1], index=val1.index)\n",
+    "\n",
+    "empty_string = CustomColumnEval(\n",
+    "    func=is_empty_string_callable,\n",
+    "    feature_type=\"cat\",\n",
+    "    display_name=\"Empty response\"\n",
+    ")\n",
+    "\n",
+    "report = Report(metrics=[\n",
+    "    ColumnSummaryMetric(column_name=empty_string.on(\"response\")),\n",
+    "])\n",
+    "\n",
+    "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n",
+    "           current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n",
+    "           column_mapping=column_mapping)\n",
+    "report "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82c8c30b-095c-4aeb-a87b-4fd637295fe7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def exact_match_callable(val1, val2):\n",
+    "    return pd.Series([\"MATCH\" if val else \"MISMATCH\" for val in val1 == val2])\n",
+    "\n",
+    "exact_match =  CustomPairColumnEval(\n",
+    "    func=exact_match_callable,\n",
+    "    first_column=\"response\",\n",
+    "    second_column=\"question\",\n",
+    "    feature_type=\"cat\",\n",
+    "    display_name=\"Exact match between response and question\"\n",
+    ")\n",
+    "\n",
+    "report = Report(metrics=[\n",
+    "    ColumnSummaryMetric(column_name=exact_match.as_column())\n",
+    "])\n",
+    "\n",
+    "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n",
+    "           current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n",
+    "           column_mapping=column_mapping)\n",
+    "report "
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "3806d7d8-5acf-45cb-b16b-3b4336dea6e0",
@@ -443,14 +508,6 @@
     "           column_mapping=column_mapping)\n",
     "report "
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c63c0d6e-e5fc-44ec-a1cd-ef85c7585973",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb b/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb
index 440a939473..25af717ed1 100644
--- a/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb
+++ b/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb
@@ -36,7 +36,7 @@
     "from evidently.report import Report\n",
     "from evidently.test_suite import TestSuite\n",
     "\n",
-    "from evidently.metric_preset import TextOverviewPreset, TextEvals\n",
+    "from evidently.metric_preset import TextEvals\n",
     "\n",
     "from evidently.metrics import TextDescriptorsDriftMetric\n",
     "from evidently.metrics import TextDescriptorsDistribution\n",
@@ -230,7 +230,7 @@
    "source": [
     "#NO descriptors\n",
     "text_overview_report = Report(metrics=[\n",
-    "    TextOverviewPreset(column_name=\"Review_Text\")\n",
+    "    TextEvals(column_name=\"Review_Text\")\n",
     "])\n",
     "\n",
     "text_overview_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n",
@@ -246,7 +246,8 @@
     "#NO descriptors, several columns\n",
     "\n",
     "text_overview_report = Report(metrics=[\n",
-    "    TextOverviewPreset(columns=[\"Review_Text\", \"Title\"])\n",
+    "    TextEvals(column_name=\"Review_Text\"),\n",
+    "    TextEvals(column_name=\"Title\"),\n",
     "])\n",
     "\n",
     "text_overview_report.run(reference_data=reviews_ref[:100], current_data=reviews_cur[:100], column_mapping=column_mapping)\n",
@@ -263,7 +264,7 @@
    "source": [
     "#WITH descriptors\n",
     "text_overview_report = Report(metrics=[\n",
-    "    TextOverviewPreset(column_name=\"Review_Text\", descriptors=[\n",
+    "    TextEvals(column_name=\"Review_Text\", descriptors=[\n",
     "        OOV(),\n",
     "        NonLetterCharacterPercentage(),\n",
     "        TextLength(),\n",
@@ -287,7 +288,18 @@
    "outputs": [],
    "source": [
     "text_overview_report = Report(metrics=[\n",
-    "    TextOverviewPreset(columns=[\"Review_Text\", \"Title\"], descriptors=[\n",
+    "    TextEvals(column_name=\"Review_Text\", descriptors=[\n",
+    "        OOV(),\n",
+    "        NonLetterCharacterPercentage(),\n",
+    "        TextLength(),\n",
+    "        IncludesWords(words_list=['dress', 'gown']),\n",
+    "        IncludesWords(words_list=['blouse', 'shirt']),\n",
+    "        SentenceCount(),\n",
+    "        WordCount(),\n",
+    "        Sentiment(),\n",
+    "        RegExp(reg_exp=r'.*\\?.*'),\n",
+    "    ]),\n",
+    "    TextEvals(column_name=\"Title\", descriptors=[\n",
     "        OOV(),\n",
     "        NonLetterCharacterPercentage(),\n",
     "        TextLength(),\n",
@@ -340,12 +352,19 @@
    "outputs": [],
    "source": [
     "summary_report = Report(metrics=[\n",
-    "    ColumnSummaryMetric(column_name=SemanticSimilarity().on([\"Review_Text\", \"Title\"]))\n",
+    "    ColumnSummaryMetric(column_name=SemanticSimilarity(with_column=\"Title\").on(\"Review_Text\"))\n",
     "])\n",
     "\n",
     "summary_report.run(reference_data=reviews_ref[:10], current_data=reviews_cur[:10], column_mapping=column_mapping)\n",
     "summary_report"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {