diff --git a/src/evidently/base_metric.py b/src/evidently/base_metric.py index 6608da0fa9..0a84cbbd3b 100644 --- a/src/evidently/base_metric.py +++ b/src/evidently/base_metric.py @@ -283,7 +283,7 @@ def required_features(self, data_definition: DataDefinition) -> List["GeneratedF for field, value in sorted(self.__dict__.items(), key=lambda x: x[0]): if field in ["context"]: continue - if issubclass(type(value), ColumnName) and value.feature_class is not None: + if isinstance(value, ColumnName) and value.feature_class is not None: required_features.append(value.feature_class) return required_features diff --git a/src/evidently/calculation_engine/engine.py b/src/evidently/calculation_engine/engine.py index e3cad0772b..b51fc4fc5e 100644 --- a/src/evidently/calculation_engine/engine.py +++ b/src/evidently/calculation_engine/engine.py @@ -1,6 +1,7 @@ import abc import functools import logging +from typing import TYPE_CHECKING from typing import Dict from typing import Generic from typing import List @@ -19,8 +20,11 @@ from evidently.features.generated_features import GeneratedFeature from evidently.utils.data_preprocessing import DataDefinition +if TYPE_CHECKING: + from evidently.suite.base_suite import Context + TMetricImplementation = TypeVar("TMetricImplementation", bound=MetricImplementation) -TInputData = TypeVar("TInputData") +TInputData = TypeVar("TInputData", bound=GenericInputData) class Engine(Generic[TMetricImplementation, TInputData]): @@ -34,10 +38,10 @@ def set_metrics(self, metrics): def set_tests(self, tests): self.tests = tests - def execute_metrics(self, context, data: GenericInputData): + def execute_metrics(self, context: "Context", data: GenericInputData): calculations: Dict[Metric, Union[ErrorResult, MetricResult]] = {} converted_data = self.convert_input_data(data) - context.features = self.generate_additional_features(converted_data) + context.set_features(self.generate_additional_features(converted_data)) context.data = converted_data for metric, calculation in self.get_metric_execution_iterator(): if calculation not in calculations: @@ -65,7 +69,7 @@ def get_data_definition( raise NotImplementedError() @abc.abstractmethod - def generate_additional_features(self, data: TInputData): + def generate_additional_features(self, data: TInputData) -> Optional[Dict[tuple, GeneratedFeature]]: raise NotImplementedError def get_metric_implementation(self, metric): diff --git a/src/evidently/calculation_engine/python_engine.py b/src/evidently/calculation_engine/python_engine.py index 8a185bf341..d9364df50c 100644 --- a/src/evidently/calculation_engine/python_engine.py +++ b/src/evidently/calculation_engine/python_engine.py @@ -54,10 +54,10 @@ def get_data_definition( raise ValueError("PandasEngine works only with pd.DataFrame input data") return create_data_definition(reference_data, current_data, column_mapping, categorical_features_cardinality) - def generate_additional_features(self, data: PythonInputData): + def generate_additional_features(self, data: PythonInputData) -> Dict[tuple, GeneratedFeature]: curr_additional_data = None ref_additional_data = None - features = {} + features: Dict[tuple, GeneratedFeature] = {} for metric, calculation in self.get_metric_execution_iterator(): try: required_features = metric.required_features(data.data_definition) diff --git a/src/evidently/spark/engine.py b/src/evidently/spark/engine.py index 178153c25a..b55e0fcd16 100644 --- a/src/evidently/spark/engine.py +++ b/src/evidently/spark/engine.py @@ -1,4 +1,5 @@ import abc +from typing import Dict from typing import Generic from typing import List from typing import Optional @@ -16,6 +17,7 @@ from evidently.calculation_engine.engine import Engine from evidently.calculation_engine.metric_implementation import MetricImplementation from evidently.core import ColumnType +from evidently.features.generated_features import GeneratedFeature from evidently.spark.base import SparkDataFrame from evidently.spark.base import SparkSeries from evidently.spark.base import create_data_definition_spark @@ -127,7 +129,7 @@ def get_data_definition( ): return create_data_definition_spark(current_data, reference_data, column_mapping) - def generate_additional_features(self, data: SparkInputData): + def generate_additional_features(self, data: SparkInputData) -> Optional[Dict[tuple, GeneratedFeature]]: pass diff --git a/src/evidently/suite/base_suite.py b/src/evidently/suite/base_suite.py index 306fed3574..8fe3e01671 100644 --- a/src/evidently/suite/base_suite.py +++ b/src/evidently/suite/base_suite.py @@ -47,6 +47,7 @@ from evidently.utils.dashboard import save_data_file from evidently.utils.dashboard import save_lib_files from evidently.utils.data_preprocessing import DataDefinition +from evidently.utils.data_preprocessing import FeatureDefinition USE_UJSON = False @@ -91,6 +92,10 @@ def _discover_dependencies(test: Union[Metric, Test]) -> Iterator[Tuple[str, Uni yield field_name, field +class RunMetadata(BaseModel): + descriptors: Dict[str, FeatureDefinition] = {} + + @dataclasses.dataclass class Context: """Pipeline execution context tracks pipeline execution and lifecycle""" @@ -106,6 +111,7 @@ class Context: features: Optional[Dict[tuple, GeneratedFeature]] = None options: Options = Options() data_definition: Optional["DataDefinition"] = None + run_metadata: RunMetadata = dataclasses.field(default_factory=RunMetadata) def get_data_definition( self, @@ -125,6 +131,20 @@ def get_data_definition( def get_datasets(self): return self.engine.form_datasets(self.data, self.features, self.data_definition) + def set_features(self, features: Optional[Dict[tuple, GeneratedFeature]]): + if features is None: + return + self.features = features + for feature in features.values(): + feature_name = feature.feature_name() + feature_class = feature_name.feature_class + self.run_metadata.descriptors[feature_name.name] = FeatureDefinition( + feature_name=feature_name.name, + display_name=feature_name.display_name, + feature_type=feature_class.feature_type, # type: ignore[union-attr] + feature_class=feature_class.__class__.__name__, + ) + class ContextPayload(BaseModel): metrics: List[Metric] @@ -133,6 +153,7 @@ class ContextPayload(BaseModel): test_results: List[TestResult] options: Options = Options() data_definition: Optional[DataDefinition] + run_metadata: RunMetadata = RunMetadata() @classmethod def from_context(cls, context: Context): @@ -143,6 +164,7 @@ def from_context(cls, context: Context): test_results=list(context.test_results.values()), options=context.options, data_definition=context.data_definition, + run_metadata=context.run_metadata, ) def to_context(self) -> Context: diff --git a/src/evidently/utils/data_preprocessing.py b/src/evidently/utils/data_preprocessing.py index d8a433f7f4..4c84f9d652 100644 --- a/src/evidently/utils/data_preprocessing.py +++ b/src/evidently/utils/data_preprocessing.py @@ -35,6 +35,13 @@ def __init__(self, column_name: str, column_type: ColumnType): super().__init__(column_name=column_name, column_type=column_type) +class FeatureDefinition(BaseModel): + feature_name: str + display_name: Optional[str] + feature_type: ColumnType + feature_class: str + + class PredictionColumns(BaseModel): predicted_values: Optional[ColumnDefinition] = None prediction_probas: Optional[List[ColumnDefinition]] = None