Skip to content

Commit

Permalink
New reports improvements. (#1414)
Browse files Browse the repository at this point in the history
  • Loading branch information
Liraim authored Jan 13, 2025
1 parent d7c11a1 commit a0b17ef
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 25 deletions.
31 changes: 19 additions & 12 deletions src/evidently/calculations/classification_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,12 @@ def get_prediction_data(
target = data_columns.utility_columns.target

if isinstance(prediction, list) and len(prediction) > 2:
pred_data = data[prediction].idxmax(axis=1)
if is_integer_dtype(data[target]):
pred_data = pred_data.apply(lambda x: int(x) if x is not None else None)
# list of columns with prediction probas, should be same as target labels
return PredictionData(
predictions=data[prediction].idxmax(axis=1),
predictions=pred_data,
prediction_probas=data[prediction],
labels=prediction,
)
Expand Down Expand Up @@ -154,25 +157,29 @@ def get_prediction_data(
and is_float_dtype(data[prediction])
):
pos_label = _check_pos_labels(pos_label, labels)
if prediction not in labels:
raise ValueError(
"No prediction for the target labels were found. "
"Consider to rename columns with the prediction to match target labels."
)

neg_label = None
if prediction in labels and pos_label != prediction:
neg_label = prediction
# get negative label for binary classification
labels = pd.Series(labels)
neg_label = labels[labels != pos_label].iloc[0]
if pos_label == prediction:
pos_preds = data[prediction]

else:
if neg_label is None:
for label in labels:
if label != pos_label:
neg_label = label
if neg_label is None:
raise ValueError("Failed to determine negative label")
if prediction in labels and neg_label == prediction:
neg_preds = data[prediction]
pos_preds = data[prediction].apply(lambda x: 1.0 - x)
else:
pos_preds = data[prediction]
neg_preds = data[prediction].apply(lambda x: 1.0 - x)

prediction_probas = pd.DataFrame.from_dict(
{
pos_label: pos_preds,
neg_label: pos_preds.apply(lambda x: 1.0 - x),
neg_label: neg_preds,
}
)
predictions = threshold_probability_labels(prediction_probas, pos_label, neg_label, threshold)
Expand Down
32 changes: 30 additions & 2 deletions src/evidently/future/metric_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,30 @@ def widget(self, value: List[BaseWidgetInfo]):
def tests(self) -> Dict["BoundTest", "MetricTestResult"]:
return self._tests or {}

def to_dict(self):
config = self._metric.metric.dict() # type: ignore[attr-defined]
config_items = []
type = None
for field, value in config.items():
if field == "type":
type = value.split(":")[-1]
continue
elif value is None:
continue
elif isinstance(value, list):
if len(value) > 0:
config_items.append(f"{field}={','.join(str(x) for x in value)}")
continue
elif isinstance(value, dict):
continue
else:
config_items.append(f"{field}={str(value)}")
return {
"id": self._metric.id,
"metric_id": f"{type}({','.join(config_items)})",
"value": self.dict(),
}

@abc.abstractmethod
def dict(self) -> object:
raise NotImplementedError()
Expand Down Expand Up @@ -228,12 +252,16 @@ def get_default_render(title: str, result: TResult) -> List[BaseWidgetInfo]:
counter(
title=title,
size=WidgetSize.FULL,
counters=[CounterData(label="", value=str(result.value))],
counters=[CounterData(label="", value=f"{result.value:0.3f}")],
),
]
if isinstance(result, ByLabelValue):
return [
table_data(title=title, column_names=["Label", "Value"], data=[(k, v) for k, v in result.values.items()])
table_data(
title=title,
column_names=["Label", "Value"],
data=[(k, f"{v:0.3f}") for k, v in result.values.items()],
)
]
if isinstance(result, CountValue):
return [
Expand Down
18 changes: 14 additions & 4 deletions src/evidently/future/metrics/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from evidently.future.metric_types import SingleValueMetric
from evidently.future.metrics._legacy import LegacyMetricCalculation
from evidently.future.report import Context
from evidently.metric_results import Label
from evidently.metrics import ClassificationDummyMetric
from evidently.metrics import ClassificationQualityByClass as _ClassificationQualityByClass
from evidently.metrics.classification_performance.classification_dummy_metric import ClassificationDummyMetricResults
Expand Down Expand Up @@ -70,6 +71,15 @@ def get_tests(self, value: ByLabelValue) -> Generator[MetricTestResult, None, No
for test in tests:
yield test.to_test()(self, label_value)

def _relabel(self, context: "Context", label: Label):
classification = context.data_definition.get_classification("default")
if classification is None:
return label
labels = classification.labels
if labels is not None:
return labels[label]
return label


class F1ByLabel(ClassificationQualityByLabel):
pass
Expand All @@ -82,7 +92,7 @@ def calculate_value(
legacy_result: ClassificationQualityByClassResult,
render: List[BaseWidgetInfo],
) -> ByLabelValue:
return ByLabelValue({k: v.f1 for k, v in legacy_result.current.metrics.items()})
return ByLabelValue({self._relabel(context, k): v.f1 for k, v in legacy_result.current.metrics.items()})

def display_name(self) -> str:
return "F1 by Label metric"
Expand All @@ -100,7 +110,7 @@ def calculate_value(
render: List[BaseWidgetInfo],
) -> ByLabelValue:
return ByLabelValue(
{k: v.precision for k, v in legacy_result.current.metrics.items()},
{self._relabel(context, k): v.precision for k, v in legacy_result.current.metrics.items()},
)

def display_name(self) -> str:
Expand All @@ -119,7 +129,7 @@ def calculate_value(
render: List[BaseWidgetInfo],
) -> ByLabelValue:
return ByLabelValue(
{k: v.recall for k, v in legacy_result.current.metrics.items()},
{self._relabel(context, k): v.recall for k, v in legacy_result.current.metrics.items()},
)

def display_name(self) -> str:
Expand All @@ -138,7 +148,7 @@ def calculate_value(
render: List[BaseWidgetInfo],
) -> ByLabelValue:
value = ByLabelValue(
{k: v.roc_auc for k, v in legacy_result.current.metrics.items()},
{self._relabel(context, k): v.roc_auc for k, v in legacy_result.current.metrics.items()},
)
value.widget = render
value.widget[0].params["counters"][0]["label"] = self.display_name()
Expand Down
4 changes: 3 additions & 1 deletion src/evidently/future/presets/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def metrics(self) -> List[Metric]:

def calculate(self, metric_results: Dict[MetricId, MetricResult]) -> PresetResult:
metric = RocAucByLabel(probas_threshold=self._probas_threshold, k=self._k)
return PresetResult(metric_results[metric.to_calculation().id].widget)
widget = metric_results[metric.to_calculation().id].widget[:]
widget[0].params["counters"][0]["label"] = "Classification Quality by Label"
return PresetResult(widget)


class ClassificationDummyQuality(MetricContainer):
Expand Down
20 changes: 14 additions & 6 deletions src/evidently/future/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,19 @@ def get_legacy_metric(self, metric: LegacyMetric[T]) -> Tuple[T, List[BaseWidget
classification = self._input_data[0]._data_definition.get_classification("default")
reference = self._input_data[1].as_dataframe() if self._input_data[1] is not None else None
current = self._input_data[0].as_dataframe()
prediction: Optional[Union[str, List[str]]]
if classification is not None:
if isinstance(classification.prediction_probas, list):
prediction = classification.prediction_probas
elif classification.prediction_probas not in current.columns:
prediction = classification.prediction_labels
else:
prediction = classification.prediction_probas
else:
prediction = None
mapping = ColumnMapping(
target=classification.target if classification is not None else None,
prediction=(classification.prediction_probas or classification.prediction_labels)
if classification is not None
else None,
prediction=prediction,
pos_label=classification.pos_label if isinstance(classification, BinaryClassification) else None,
target_names=classification.labels if classification is not None else None,
)
Expand Down Expand Up @@ -204,10 +212,10 @@ def _repr_html_(self):

def dict(self) -> dict:
return {
"metrics": {
metric: self.context.get_metric_result(metric).dict() # type: ignore[attr-defined]
"metrics": [
self.context.get_metric_result(metric).to_dict() # type: ignore[attr-defined]
for metric in self.context._metrics_graph.keys()
},
],
"tests": {
test.get_fingerprint(): test_result.dict()
for metric in self.context._metrics_graph.keys()
Expand Down
41 changes: 41 additions & 0 deletions tests/calculations/test_classification_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@

from evidently.calculations.classification_performance import calculate_confusion_by_classes
from evidently.calculations.classification_performance import calculate_metrics
from evidently.calculations.classification_performance import get_prediction_data
from evidently.metric_results import ConfusionMatrix
from evidently.metric_results import DatasetColumns
from evidently.metric_results import DatasetUtilityColumns
from evidently.metric_results import PredictionData
from evidently.pipeline.column_mapping import ColumnMapping

Expand Down Expand Up @@ -58,3 +61,41 @@ def test_calculate_metrics():
assert actual_result.rate_plots_data.fpr == [pytest.approx(v) for v in [0.0, 0.0, 0.4, 0.4, 0.6, 0.6, 1.0]]
assert actual_result.rate_plots_data.fnr == [pytest.approx(v) for v in [1.0, 0.8, 0.8, 0.2, 0.2, 0.0, 0.0]]
assert actual_result.rate_plots_data.tnr == [pytest.approx(v) for v in [1.0, 1.0, 0.6, 0.6, 0.4, 0.4, 0.0]]


@pytest.mark.parametrize(
"dataframe,target,prediction,target_names,pos_label,expected",
(
(
pd.DataFrame(data={"col": ["a", "b", "b", "a", "b"], "prob": [0.1, 0.1, 0.1, 0.8, 0.2]}),
"col",
"prob",
["a", "b"],
"a",
{"a": [0.1, 0.1, 0.1, 0.8, 0.2], "b": [0.9, 0.9, 0.9, 0.2, 0.8]},
),
(
pd.DataFrame(data={"col": ["a", "b", "b", "a", "b"], "prob": [0.1, 0.1, 0.1, 0.8, 0.2]}),
"col",
"prob",
["a", "b"],
"a",
{"a": [0.1, 0.1, 0.1, 0.8, 0.2], "b": [0.9, 0.9, 0.9, 0.2, 0.8]},
),
),
)
def test_get_prediction_data(dataframe, target, prediction, target_names, pos_label, expected):
data = get_prediction_data(
dataframe,
DatasetColumns(
utility_columns=DatasetUtilityColumns(target=target, prediction=prediction),
target_names=target_names,
num_feature_names=[],
cat_feature_names=[],
text_feature_names=[],
datetime_feature_names=[],
),
pos_label=pos_label,
)
for label in target_names:
assert np.allclose(data.prediction_probas[label], expected[label], atol=1e-6)

0 comments on commit a0b17ef

Please sign in to comment.