Skip to content

Commit

Permalink
Add UniqueValueCount and new DistributionPanel.
Browse files Browse the repository at this point in the history
  • Loading branch information
Liraim committed Jan 10, 2025
1 parent d3c2bf1 commit d84c97b
Show file tree
Hide file tree
Showing 5 changed files with 2,102 additions and 2,631 deletions.
33 changes: 25 additions & 8 deletions examples/future_dashboads.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,25 @@
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2025-01-08T13:01:54.904648Z",
"start_time": "2025-01-08T13:01:51.766554Z"
"end_time": "2025-01-09T23:58:58.734191Z",
"start_time": "2025-01-09T23:58:58.566108Z"
}
},
"source": [
"from random import randint\n",
"\n",
"from evidently.ui.dashboards.reports import DistributionPanel\n",
"from evidently.future.datasets import BinaryClassification\n",
"from evidently.future.metrics import PrecisionByLabel\n",
"from evidently.future.metrics.column_statistics import CategoryCount\n",
"from evidently.ui.dashboards import CounterAgg\n",
"from evidently.ui.dashboards import PanelValue\n",
"from evidently.ui.dashboards import ReportFilter\n",
"import datetime\n",
"import pandas as pd\n",
"\n",
"from evidently.future.metrics import MeanValue\n",
"from evidently.future.metrics import UniqueValueCount\n",
"from evidently.future.datasets import Dataset\n",
"from evidently.future.datasets import DataDefinition\n",
"from evidently.future.datasets import ColumnInfo\n",
Expand All @@ -31,14 +38,19 @@
"from evidently.ui.workspace import Workspace\n",
"\n",
"def create_snapshot(i):\n",
" df = pd.DataFrame({\"col\": list(range(i + 5))})\n",
" df = pd.DataFrame({\n",
" \"col\": list(randint(0, 5) for _ in range(i + 5)),\n",
" \"target\": list(x % 2 for x in range(i + 5)),\n",
" \"prediction\": list(x / 10 for x in range(i + 5)),\n",
" })\n",
" dataset = Dataset.from_pandas(\n",
" df,\n",
" data_definition=DataDefinition(\n",
" numerical_features=[\"col\"],\n",
" classifications=[BinaryClassification()]\n",
" ),\n",
" )\n",
" report = Report([MeanValue(column=\"col\", tests=[lte(4)])])\n",
" report = Report([MeanValue(column=\"col\", tests=[lte(4)]), UniqueValueCount(column=\"col\")])\n",
" snapshot_v2 = report.run(dataset, None)\n",
"\n",
" snapshot_v1 = snapshot_v2_to_v1(snapshot_v2)\n",
Expand All @@ -64,20 +76,25 @@
" value=PanelValue(field_path=\"value\", metric_args={\"metric.metric_id\": \"2e5caa9690281e02cf243c736d687782\"}),\n",
" filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),\n",
" ))\n",
" project.dashboard.add_panel(DistributionPanel(\n",
" title=\"Distr\",\n",
" value=PanelValue(field_path=\"values\", metric_args={\"metric.type\": \"evidently:metric_v2:UniqueValueCount\"}),\n",
" filter=ReportFilter(metadata_values={}, tag_values=[], include_test_suites=True),\n",
" ))\n",
" # project.dashboard.add_panel(SingleValueDashboardPanel(metric_id=\"mean:col\"))\n",
" project.save()\n",
"\n",
"for i in range(10):\n",
" project.add_snapshot(create_snapshot(i))"
" project.add_snapshot(create_snapshot(i)) "
],
"outputs": [],
"execution_count": 1
"execution_count": 2
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2025-01-08T13:01:54.911704Z",
"start_time": "2025-01-08T13:01:54.908667Z"
"end_time": "2025-01-09T23:56:41.749035100Z",
"start_time": "2025-01-09T23:53:00.368240Z"
}
},
"cell_type": "code",
Expand Down
4,590 changes: 1,969 additions & 2,621 deletions examples/list_metrics.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/evidently/future/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .column_statistics import MinValue
from .column_statistics import QuantileValue
from .column_statistics import StdValue
from .column_statistics import UniqueValueCount
from .column_statistics import ValueDrift
from .dataset_statistics import AlmostConstantColumnsCount
from .dataset_statistics import AlmostDuplicatedColumnsCount
Expand Down Expand Up @@ -53,6 +54,7 @@
"MinValue",
"QuantileValue",
"StdValue",
"UniqueValueCount",
# dataset statistics metrics
"ColumnCount",
"RowCount",
Expand Down
22 changes: 20 additions & 2 deletions src/evidently/future/metrics/column_statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
from evidently.calculations.stattests import PossibleStatTestType
from evidently.future.datasets import Dataset
from evidently.future.datasets import DatasetColumn
from evidently.future.metric_types import ByLabelCalculation
from evidently.future.metric_types import ByLabelMetric
from evidently.future.metric_types import ByLabelValue
from evidently.future.metric_types import CountCalculation
from evidently.future.metric_types import CountMetric
from evidently.future.metric_types import CountValue
Expand Down Expand Up @@ -322,6 +325,10 @@ class LegacyDriftedColumnsMetric(
Generic[TMetric],
abc.ABC,
):
pass


class DriftedColumnCalculation(LegacyDriftedColumnsMetric[DriftedColumnsCount]):
def legacy_metric(self) -> DatasetDriftMetric:
return DatasetDriftMetric(
columns=self.metric.columns,
Expand All @@ -338,8 +345,6 @@ def legacy_metric(self) -> DatasetDriftMetric:
per_column_stattest_threshold=self.metric.per_column_stattest_threshold,
)


class DriftedColumnCalculation(LegacyDriftedColumnsMetric[DriftedColumnsCount]):
def calculate_value(
self, context: Context, legacy_result: DatasetDriftMetricResults, render: List[BaseWidgetInfo]
) -> CountValue:
Expand All @@ -348,3 +353,16 @@ def calculate_value(

def display_name(self) -> str:
return "Count of Drifted Columns"


class UniqueValueCount(ByLabelMetric):
column: str


class UniqueValueCountCalculation(ByLabelCalculation[UniqueValueCount]):
def calculate(self, current_data: Dataset, reference_data: Optional[Dataset]) -> ByLabelValue:
value_counts = current_data.as_dataframe()[self.metric.column].value_counts()
return ByLabelValue(value_counts.to_dict())

def display_name(self) -> str:
return "Unique Value Count"
86 changes: 86 additions & 0 deletions src/evidently/ui/dashboards/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,10 @@
from plotly import graph_objs as go

from evidently.base_metric import Metric
from evidently.future.metric_types import Value
from evidently.metric_results import Distribution
from evidently.metric_results import HistogramData
from evidently.metric_results import Label
from evidently.model.widget import BaseWidgetInfo
from evidently.pydantic_utils import autoregister
from evidently.renderers.html_widgets import CounterData
Expand Down Expand Up @@ -229,3 +231,87 @@ async def build(
fig.update_layout(barmode=self.barmode.value)

return plotly_figure(title=self.title, figure=fig, size=self.size)


@autoregister
class DistributionPanel(DashboardPanel):
class Config:
type_alias = "evidently:dashboard_panel:DistributionPanel"

value: PanelValue
barmode: HistBarMode = HistBarMode.STACK

@assign_panel_id
async def build(
self,
data_storage: "DataStorage",
project_id: ProjectID,
timestamp_start: Optional[datetime.datetime],
timestamp_end: Optional[datetime.datetime],
) -> BaseWidgetInfo:
label_values: DataPointsAsType[dict] = await data_storage.load_points_as_type(
dict, # type: ignore[arg-type]
project_id,
self.filter,
[self.value],
timestamp_start,
timestamp_end,
)
results = label_values[0]
if len(results) == 0:
raise ValueError(f"Cannot build hist from {self.value}")
if len(results) > 1:
raise ValueError(f"Ambiguious metrics for {self.value}")
metric = next(iter(results.keys()))
fingerprint = metric.get_fingerprint()
point_values: List[Tuple[datetime.datetime, SnapshotID, Dict[Label, Value]]] = next(
[
(
d.timestamp,
d.snapshot_id,
d.value,
)
for d in v
]
for v in results.values()
)

timestamps: List[datetime.datetime] = []
names: Set[str] = set()
values: List[Dict[str, Any]] = []
snapshot_ids = []

for timestamp, snapshot_id, data in point_values:
timestamps.append(timestamp)
values.append(data)
names.update(data.keys())
snapshot_ids.append(snapshot_id)

names_sorted = list(sorted(names))
name_to_date_value: Dict[str, List[Any]] = defaultdict(list)
name_to_snapshot_id: Dict[str, List[SnapshotID]] = defaultdict(list)
for timestamp, snapshot_id, data in zip(timestamps, snapshot_ids, values):
for name in names_sorted:
name_to_date_value[name].append(data.get(name))
name_to_snapshot_id[name].append(snapshot_id)

hovertemplate = "<b>{name}: %{{y}}</b><br><b>Timestamp: %{{x}}</b>"
fig = go.Figure(
data=[
go.Bar(
name=name,
x=timestamps,
y=name_to_date_value[name],
hovertemplate=hovertemplate.format(name=name),
customdata=[
{"metric_fingerprint": fingerprint, "snapshot_id": str(snapshot_id)}
for snapshot_id in name_to_snapshot_id[name]
],
)
for name in names_sorted
]
)
# Change the bar mode
fig.update_layout(barmode=self.barmode.value)

return plotly_figure(title=self.title, figure=fig, size=self.size)

0 comments on commit d84c97b

Please sign in to comment.