Skip to content

Commit

Permalink
updated the report and test fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
pmittaldev committed Oct 16, 2024
1 parent a50c9e2 commit ff9d543
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
"outputs": [],
"source": [
"report = Report(metrics=[\n",
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=1)\n",
" ROUGESummaryMetric(column_name=\"summary\", rouge_n=2)\n",
"])"
]
},
Expand Down Expand Up @@ -75,11 +75,27 @@
"source": [
"report.as_dict()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"report.as_dataframe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "evidently",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -97,5 +113,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
12 changes: 12 additions & 0 deletions src/evidently/metrics/_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,13 @@
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetric",
"evidently:metric:DatasetSummaryMetric",
)

register_type_alias(
Metric,
"evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetric",
"evidently:metric:ROUGESummaryMetric",
)

register_type_alias(
Metric,
"evidently.metrics.data_quality.column_category_metric.ColumnCategoryMetric",
Expand Down Expand Up @@ -570,6 +577,11 @@
"evidently.metrics.data_integrity.dataset_summary_metric.DatasetSummaryMetricResult",
"evidently:metric_result:DatasetSummaryMetricResult",
)
register_type_alias(
MetricResult,
"evidently.metrics.data_integrity.rouge_summary_metric.ROUGESummaryMetricResult",
"evidently:metric_result:ROUGESummaryMetricResult",
)
register_type_alias(
MetricResult,
"evidently.metrics.data_quality.column_category_metric.CategoryStat",
Expand Down
56 changes: 37 additions & 19 deletions src/evidently/metrics/data_integrity/rouge_summary_metric.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,37 @@
from typing import List
from typing import Union

import evaluate
import pandas as pd

from evidently.base_metric import ColumnName
from evidently.base_metric import InputData
from evidently.base_metric import Metric
from evidently.base_metric import MetricResult
from evidently.core import IncludeTags
from evidently.model.widget import BaseWidgetInfo
from evidently.options.base import AnyOptions
from evidently.renderers.base_renderer import MetricRenderer
from evidently.renderers.base_renderer import default_renderer
from evidently.renderers.html_widgets import header_text
from evidently.renderers.html_widgets import table_data
from evidently.renderers.html_widgets import text_widget


class ROUGESummaryMetricResult(MetricResult):
class Config:
type_alias = "evidently:metric_result:ROUGESummaryMetricResult"
field_tags = {
"current": {IncludeTags.Current},
"reference": {IncludeTags.Reference},
"rouge_type": {IncludeTags.Parameter},
"value": {IncludeTags.Parameter},
"per_row_scores": {IncludeTags.Parameter},
"summary_score": {IncludeTags.Parameter},
}

current: list
reference: list
rouge_type: str
score: dict
per_row_scores: list
summary_score: float


class ROUGESummaryMetric(Metric[ROUGESummaryMetricResult]):
Expand All @@ -36,50 +42,62 @@ class Config:
column_name: str
rouge_n: int

def __init__(self, column_name: Union[str, ColumnName], rouge_n: int):
def __init__(self, column_name: str, rouge_n: int, options: AnyOptions = None):
self.column_name = column_name
self.rouge_n = rouge_n
super().__init__()
super().__init__(options=options)

def _calculate_summary_rouge(self, current_data: pd.Series, reference_data: pd.Series):
def _calculate_summary_rouge(self, current: pd.Series, reference: pd.Series):
rouge_evaluator = evaluate.load("rouge")

predictions = current_data.astype(str).tolist()
references = reference_data.astype(str).tolist()
current = current.astype(str).tolist()
reference = reference.astype(str).tolist()

rouge_scores = rouge_evaluator.compute(
rouge_types=[f"rouge{self.rouge_n}"], predictions=predictions, references=references, use_aggregator=False
rouge_types=[f"rouge{self.rouge_n}"], predictions=current, references=reference, use_aggregator=False
)

per_row_rouge_scores = rouge_scores[f"rouge{self.rouge_n}"]

summary_rouge_score = sum(per_row_rouge_scores) / len(per_row_rouge_scores)

return per_row_rouge_scores, summary_rouge_score
return per_row_rouge_scores, summary_rouge_score, current, reference

def calculate(self, data: InputData) -> MetricResult:
def calculate(self, data: InputData) -> ROUGESummaryMetricResult:
if data.current_data is None or data.reference_data is None:
raise ValueError("The current data or the reference data is None.")
if len(data.current_data[self.column_name]) == 0 or len(data.reference_data[self.column_name]) == 0:
raise ValueError("The current data or the reference data is empty.")

per_row_rouge_scores, summary_rouge_score = self._calculate_summary_rouge(
per_row_rouge_scores, summary_rouge_score, current, reference = self._calculate_summary_rouge(
data.current_data[self.column_name], data.reference_data[self.column_name]
)

result = ROUGESummaryMetricResult(
rouge_type=f"ROUGE-{self.rouge_n}",
score={"per_row_scores": per_row_rouge_scores, "summary_score": summary_rouge_score},
per_row_scores=per_row_rouge_scores,
summary_score=summary_rouge_score,
current=current,
reference=reference,
)
return result


@default_renderer(wrap_type=ROUGESummaryMetric)
class ROUGESummaryMetricRenderer(MetricRenderer):
@staticmethod
def _get_table(metric, n: int = 2) -> BaseWidgetInfo:
column_names = ["Metric", "Value"]
rows = ([metric.rouge_type, metric.score],)
def _get_table(metric) -> BaseWidgetInfo:
column_names = ["Metric", "current", "reference", "score"]
rows = []
for i in range(len(metric.current)):
rows.append([metric.rouge_type, metric.current[i], metric.reference[i], metric.per_row_scores[i]])
# rows.append(["metric.rouge_type", 1, "metric.current[i]", "metric.reference[i]", 2.4])
return table_data(title="", column_names=column_names, data=rows)

def render_html(self, obj: ROUGESummaryMetricResult) -> List[BaseWidgetInfo]:
def render_html(self, obj: ROUGESummaryMetric) -> List[BaseWidgetInfo]:
metric = obj.get_result()
return [header_text(label="ROUGE Metric"), self._get_table(metric)]
return [
header_text(label="ROUGE Metric"),
self._get_table(metric),
text_widget(text=f"{metric.summary_score}", title="Overall ROUGE score"),
]
38 changes: 7 additions & 31 deletions tests/metrics/data_interity/test_dataset_rouge_summary_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,7 @@
import pytest

from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetricResult
from evidently.report.report import Report
from tests.conftest import smart_assert_equal


@pytest.mark.parametrize(
"current_df, reference_df, metric, expected_result",
(
(
pd.DataFrame(
{
"summary": ["hello there", "general kenobi"],
}
),
pd.DataFrame({"summary": ["hello there", "no de"]}),
ROUGESummaryMetric(column_name="summary", rouge_n=1),
ROUGESummaryMetricResult(rouge_type="ROUGE-1", score={"per_row_scores": [1.0, 0.0], "summary_score": 0.5}),
),
),
)
def test_rouge_summary_metric_success(
current_df: pd.DataFrame,
reference_df: pd.DataFrame,
metric,
expected_result: ROUGESummaryMetricResult,
) -> None:
report = Report(metrics=[metric])

report.run(current_data=current_df, reference_data=reference_df)

smart_assert_equal(metric.get_result(), expected_result)


@pytest.mark.parametrize(
Expand All @@ -48,7 +18,13 @@ def test_rouge_summary_metric_success(
),
pd.DataFrame({"summary": ["hello there", "no de"]}),
ROUGESummaryMetric(column_name="summary", rouge_n=1),
{"rouge_type": "ROUGE-1", "score": {"per_row_scores": [1.0, 0.0], "summary_score": 0.5}},
{
"current": ["hello there", "general kenobi"],
"reference": ["hello there", "no de"],
"rouge_type": "ROUGE-1",
"per_row_scores": [1.0, 0.0],
"summary_score": 0.5,
},
),
),
)
Expand Down
22 changes: 22 additions & 0 deletions tests/multitest/metrics/data_integrity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from evidently.metrics.data_integrity.column_summary_metric import NumericCharacteristics
from evidently.metrics.data_integrity.dataset_missing_values_metric import DatasetMissingValuesMetric
from evidently.metrics.data_integrity.dataset_summary_metric import DatasetSummaryMetric
from evidently.metrics.data_integrity.rouge_summary_metric import ROUGESummaryMetric
from tests.multitest.conftest import AssertExpectedResult
from tests.multitest.conftest import Error
from tests.multitest.conftest import NoopOutcome
Expand Down Expand Up @@ -206,6 +207,27 @@ def dataset_summary_metric():
)


@metric
def rouge_summary_metric():
return TestMetric(
name="rouge_summary_metric",
metric=ROUGESummaryMetric(column_name="summary", rouge_n=1),
fingerprint="bfc616f760b973d2cbfbf0540c7b2c71",
outcomes=NoopOutcome(),
datasets=[
TestDataset(
"rouge_summary_metric_data",
current=pd.DataFrame(
{
"summary": ["hello there", "general kenobi"],
}
),
reference=pd.DataFrame({"summary": ["hello there", "no de"]}),
),
],
)


@metric
def column_reg_exp_metric():
return TestMetric(
Expand Down

0 comments on commit ff9d543

Please sign in to comment.