From 5a653f97c76d702701d05492867ed72a51ff2298 Mon Sep 17 00:00:00 2001 From: Emeli Dral Date: Fri, 6 Sep 2024 22:12:51 +0100 Subject: [PATCH] Added exaples for custom descriptors, semantic similarity; removed TextOverviewPreset (#1288) --- ...o_evaluate_llm_with_text_descriptors.ipynb | 2 +- .../how_to_use_llm_judge_template.ipynb | 79 ++++++++++++++++--- ...descriptors_in_text_specific_metrics.ipynb | 31 ++++++-- 3 files changed, 94 insertions(+), 18 deletions(-) diff --git a/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb b/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb index b4da931bc1..6ed6daf5f3 100644 --- a/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb +++ b/examples/how_to_questions/how_to_evaluate_llm_with_text_descriptors.ipynb @@ -50,7 +50,7 @@ "from evidently.metrics import ColumnSummaryMetric, ColumnDistributionMetric, ColumnDriftMetric, DataDriftTable, TextDescriptorsDistribution, ColumnCategoryMetric\n", "from evidently.tests import TestColumnValueMin, TestColumnValueMean, TestCategoryShare, TestShareOfOutRangeValues\n", "\n", - "from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextOverviewPreset, TextEvals\n", + "from evidently.metric_preset import DataDriftPreset, DataQualityPreset, TextEvals\n", "\n", "from evidently.descriptors import HuggingFaceModel, HuggingFaceToxicityModel, OpenAIPrompting \n", "from evidently.descriptors import RegExp, BeginsWith, EndsWith, Contains, DoesNotContain, IncludesWords, ExcludesWords\n", diff --git a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb index 202330f5d6..d6f57a6ea5 100644 --- a/examples/how_to_questions/how_to_use_llm_judge_template.ipynb +++ b/examples/how_to_questions/how_to_use_llm_judge_template.ipynb @@ -15,7 +15,9 @@ "metadata": {}, "outputs": [], "source": [ - "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval" + "from evidently.descriptors import LLMEval, NegativityLLMEval, PIILLMEval, DeclineLLMEval, BiasLLMEval, ToxicityLLMEval, ContextQualityLLMEval\n", + "from evidently.descriptors import SemanticSimilarity \n", + "from evidently.descriptors import CustomColumnEval, CustomPairColumnEval" ] }, { @@ -52,7 +54,7 @@ "\n", "from evidently.metrics import ColumnSummaryMetric\n", "\n", - "from evidently.metric_preset import DataQualityPreset, TextOverviewPreset, TextEvals" + "from evidently.metric_preset import DataQualityPreset, TextEvals" ] }, { @@ -233,7 +235,8 @@ "source": [ "report = Report(metrics=[\n", " TextEvals(column_name=\"question\", descriptors=[\n", - " NegativityLLMEval(include_category=True) \n", + " NegativityLLMEval(include_category=True),\n", + " SemanticSimilarity(with_column=\"response\")\n", " ]),\n", " TextEvals(column_name=\"response\", descriptors=[\n", " PIILLMEval(include_reasoning=False), \n", @@ -308,6 +311,68 @@ "print(ContextQualityLLMEval(question=\"question\").get_template().get_prompt_template())" ] }, + { + "cell_type": "markdown", + "id": "7253dced-0c84-4e27-9c97-c4bb476ef110", + "metadata": {}, + "source": [ + "### Custom descriptor over text data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c74f5f3d-56ac-42c1-b5e1-4c81411232b2", + "metadata": {}, + "outputs": [], + "source": [ + "def is_empty_string_callable(val1):\n", + " return pd.Series([\"EMPTY\" if val == \"\" else \"NON EMPTY\" for val in val1], index=val1.index)\n", + "\n", + "empty_string = CustomColumnEval(\n", + " func=is_empty_string_callable,\n", + " feature_type=\"cat\",\n", + " display_name=\"Empty response\"\n", + ")\n", + "\n", + "report = Report(metrics=[\n", + " ColumnSummaryMetric(column_name=empty_string.on(\"response\")),\n", + "])\n", + "\n", + "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n", + " current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n", + " column_mapping=column_mapping)\n", + "report " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82c8c30b-095c-4aeb-a87b-4fd637295fe7", + "metadata": {}, + "outputs": [], + "source": [ + "def exact_match_callable(val1, val2):\n", + " return pd.Series([\"MATCH\" if val else \"MISMATCH\" for val in val1 == val2])\n", + "\n", + "exact_match = CustomPairColumnEval(\n", + " func=exact_match_callable,\n", + " first_column=\"response\",\n", + " second_column=\"question\",\n", + " feature_type=\"cat\",\n", + " display_name=\"Exact match between response and question\"\n", + ")\n", + "\n", + "report = Report(metrics=[\n", + " ColumnSummaryMetric(column_name=exact_match.as_column())\n", + "])\n", + "\n", + "report.run(reference_data=assistant_logs[datetime(2024, 4, 8) : datetime(2024, 4, 9)][:10], \n", + " current_data=assistant_logs[datetime(2024, 4, 9) : datetime(2024, 4, 10)][:10], \n", + " column_mapping=column_mapping)\n", + "report " + ] + }, { "cell_type": "markdown", "id": "3806d7d8-5acf-45cb-b16b-3b4336dea6e0", @@ -443,14 +508,6 @@ " column_mapping=column_mapping)\n", "report " ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c63c0d6e-e5fc-44ec-a1cd-ef85c7585973", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb b/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb index 440a939473..25af717ed1 100644 --- a/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb +++ b/examples/how_to_questions/how_to_use_text_descriptors_in_text_specific_metrics.ipynb @@ -36,7 +36,7 @@ "from evidently.report import Report\n", "from evidently.test_suite import TestSuite\n", "\n", - "from evidently.metric_preset import TextOverviewPreset, TextEvals\n", + "from evidently.metric_preset import TextEvals\n", "\n", "from evidently.metrics import TextDescriptorsDriftMetric\n", "from evidently.metrics import TextDescriptorsDistribution\n", @@ -230,7 +230,7 @@ "source": [ "#NO descriptors\n", "text_overview_report = Report(metrics=[\n", - " TextOverviewPreset(column_name=\"Review_Text\")\n", + " TextEvals(column_name=\"Review_Text\")\n", "])\n", "\n", "text_overview_report.run(reference_data=reviews_ref, current_data=reviews_cur, column_mapping=column_mapping)\n", @@ -246,7 +246,8 @@ "#NO descriptors, several columns\n", "\n", "text_overview_report = Report(metrics=[\n", - " TextOverviewPreset(columns=[\"Review_Text\", \"Title\"])\n", + " TextEvals(column_name=\"Review_Text\"),\n", + " TextEvals(column_name=\"Title\"),\n", "])\n", "\n", "text_overview_report.run(reference_data=reviews_ref[:100], current_data=reviews_cur[:100], column_mapping=column_mapping)\n", @@ -263,7 +264,7 @@ "source": [ "#WITH descriptors\n", "text_overview_report = Report(metrics=[\n", - " TextOverviewPreset(column_name=\"Review_Text\", descriptors=[\n", + " TextEvals(column_name=\"Review_Text\", descriptors=[\n", " OOV(),\n", " NonLetterCharacterPercentage(),\n", " TextLength(),\n", @@ -287,7 +288,18 @@ "outputs": [], "source": [ "text_overview_report = Report(metrics=[\n", - " TextOverviewPreset(columns=[\"Review_Text\", \"Title\"], descriptors=[\n", + " TextEvals(column_name=\"Review_Text\", descriptors=[\n", + " OOV(),\n", + " NonLetterCharacterPercentage(),\n", + " TextLength(),\n", + " IncludesWords(words_list=['dress', 'gown']),\n", + " IncludesWords(words_list=['blouse', 'shirt']),\n", + " SentenceCount(),\n", + " WordCount(),\n", + " Sentiment(),\n", + " RegExp(reg_exp=r'.*\\?.*'),\n", + " ]),\n", + " TextEvals(column_name=\"Title\", descriptors=[\n", " OOV(),\n", " NonLetterCharacterPercentage(),\n", " TextLength(),\n", @@ -340,12 +352,19 @@ "outputs": [], "source": [ "summary_report = Report(metrics=[\n", - " ColumnSummaryMetric(column_name=SemanticSimilarity().on([\"Review_Text\", \"Title\"]))\n", + " ColumnSummaryMetric(column_name=SemanticSimilarity(with_column=\"Title\").on(\"Review_Text\"))\n", "])\n", "\n", "summary_report.run(reference_data=reviews_ref[:10], current_data=reviews_cur[:10], column_mapping=column_mapping)\n", "summary_report" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {