diff --git a/evidently/tabs/prob_classification_performance_tab.py b/evidently/tabs/prob_classification_performance_tab.py index f338ad365c..839662818b 100644 --- a/evidently/tabs/prob_classification_performance_tab.py +++ b/evidently/tabs/prob_classification_performance_tab.py @@ -15,13 +15,14 @@ from evidently.widgets.prob_class_prod_metrics_matrix_widget import ProbClassProdMetricsMatrixWidget from evidently.widgets.prob_class_ref_prediction_cloud_widget import ProbClassRefPredictionCloudWidget from evidently.widgets.prob_class_prod_prediction_cloud_widget import ProbClassProdPredictionCloudWidget +from evidently.widgets.prob_class_ref_pred_distr_widget import ProbClassRefPredDistrWidget +from evidently.widgets.prob_class_prod_pred_distr_widget import ProbClassProdPredDistrWidget from evidently.widgets.prob_class_ref_roc_curve_widget import ProbClassRefRocCurveWidget from evidently.widgets.prob_class_prod_roc_curve_widget import ProbClassProdRocCurveWidget from evidently.widgets.prob_class_ref_pr_curve_widget import ProbClassRefPRCurveWidget from evidently.widgets.prob_class_prod_pr_curve_widget import ProbClassProdPRCurveWidget from evidently.widgets.prob_class_ref_pr_table_widget import ProbClassRefPRTableWidget from evidently.widgets.prob_class_prod_pr_table_widget import ProbClassProdPRTableWidget -#from evidently.widgets.tabs_widget import TabsWidget from evidently.widgets.prob_class_confusion_based_feature_distr_table_widget import ProbClassConfusionBasedFeatureDistrTable from evidently.widgets.widget import Widget @@ -42,6 +43,8 @@ def _get_widgets(self) -> List[Widget]: ProbClassProdMetricsMatrixWidget("Current: Quality Metrics by Class"), ProbClassRefPredictionCloudWidget("Reference: Class Separation Quality"), ProbClassProdPredictionCloudWidget("Current: Class Separation Quality"), + ProbClassRefPredDistrWidget("Reference: Probability Distribution"), + ProbClassProdPredDistrWidget("Current: Probability Distribution"), ProbClassRefRocCurveWidget("Reference: ROC Curve"), ProbClassProdRocCurveWidget("Current: ROC Curve"), ProbClassRefPRCurveWidget("Reference: Precision-Recall Curve"), diff --git a/evidently/widgets/prob_class_prod_pred_distr_widget.py b/evidently/widgets/prob_class_prod_pred_distr_widget.py new file mode 100644 index 0000000000..bdd80e2cd6 --- /dev/null +++ b/evidently/widgets/prob_class_prod_pred_distr_widget.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# coding: utf-8 + +import json +import pandas as pd + +import numpy as np + +from sklearn import metrics, preprocessing +from pandas.api.types import is_numeric_dtype + +import plotly.graph_objs as go +import plotly.figure_factory as ff + +from evidently.model.widget import BaseWidgetInfo, AlertStats, AdditionalGraphInfo +from evidently.widgets.widget import Widget + +red = "#ed0400" +grey = "#4d4d4d" + + +class ProbClassProdPredDistrWidget(Widget): + def __init__(self, title: str): + super().__init__() + self.title = title + + def get_info(self) -> BaseWidgetInfo: + #if self.wi: + return self.wi + #raise ValueError("No prediction or target data provided") + + def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): + if column_mapping: + date_column = column_mapping.get('datetime') + id_column = column_mapping.get('id') + target_column = column_mapping.get('target') + prediction_column = column_mapping.get('prediction') + num_feature_names = column_mapping.get('numerical_features') + #target_names = column_mapping.get('target_names') + if num_feature_names is None: + num_feature_names = [] + else: + num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] + + cat_feature_names = column_mapping.get('categorical_features') + if cat_feature_names is None: + cat_feature_names = [] + else: + cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] + + else: + date_column = 'datetime' if 'datetime' in reference_data.columns else None + id_column = None + target_column = 'target' if 'target' in reference_data.columns else None + prediction_column = 'prediction' if 'prediction' in reference_data.columns else None + + utility_columns = [date_column, id_column, target_column, prediction_column] + + num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns)) + cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns)) + + #target_names = None + + if production_data is not None and target_column is not None and prediction_column is not None: + production_data.replace([np.inf, -np.inf], np.nan, inplace=True) + production_data.dropna(axis=0, how='any', inplace=True) + + array_prediction = production_data[prediction_column].to_numpy() + + prediction_ids = np.argmax(array_prediction, axis=-1) + prediction_labels = [prediction_column[x] for x in prediction_ids] + + #plot support bar + graphs = [] + + for label in prediction_column: + pred_distr = ff.create_distplot( + [ + production_data[production_data[target_column] == label][label], + production_data[production_data[target_column] != label][label] + ], + [str(label), "other"], + colors=[red, grey], + bin_size = 0.05, + show_curve = False, + show_rug=True) + + pred_distr.update_layout( + xaxis_title = "Probability", + yaxis_title = "Share", + legend = dict( + orientation="h", + yanchor="bottom", + y=1.02, + xanchor="right", + x=1 + ) + ) + + pred_distr_json = json.loads(pred_distr.to_json()) + + graphs.append({ + "id": "tab_" + str(label), + "title": str(label), + "graph":{ + "data":pred_distr_json["data"], + "layout":pred_distr_json["layout"], + } + }) + + self.wi = BaseWidgetInfo( + title=self.title, + type="tabbed_graph", + details="", + alertStats=AlertStats(), + alerts=[], + alertsPosition="row", + insights=[], + size=1, + params={ + "graphs": graphs + }, + additionalGraphs=[], + ) + else: + self.wi = None + diff --git a/evidently/widgets/prob_class_ref_pred_distr_widget.py b/evidently/widgets/prob_class_ref_pred_distr_widget.py new file mode 100644 index 0000000000..93fd7a6594 --- /dev/null +++ b/evidently/widgets/prob_class_ref_pred_distr_widget.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python +# coding: utf-8 + +import json +import pandas as pd + +import numpy as np + +from sklearn import metrics, preprocessing +from pandas.api.types import is_numeric_dtype + +import plotly.graph_objs as go +import plotly.figure_factory as ff + +from evidently.model.widget import BaseWidgetInfo, AlertStats, AdditionalGraphInfo +from evidently.widgets.widget import Widget + +red = "#ed0400" +grey = "#4d4d4d" + + +class ProbClassRefPredDistrWidget(Widget): + def __init__(self, title: str): + super().__init__() + self.title = title + + def get_info(self) -> BaseWidgetInfo: + if self.wi: + return self.wi + raise ValueError("No prediction or target data provided") + + def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping): + if column_mapping: + date_column = column_mapping.get('datetime') + id_column = column_mapping.get('id') + target_column = column_mapping.get('target') + prediction_column = column_mapping.get('prediction') + num_feature_names = column_mapping.get('numerical_features') + #target_names = column_mapping.get('target_names') + if num_feature_names is None: + num_feature_names = [] + else: + num_feature_names = [name for name in num_feature_names if is_numeric_dtype(reference_data[name])] + + cat_feature_names = column_mapping.get('categorical_features') + if cat_feature_names is None: + cat_feature_names = [] + else: + cat_feature_names = [name for name in cat_feature_names if is_numeric_dtype(reference_data[name])] + + else: + date_column = 'datetime' if 'datetime' in reference_data.columns else None + id_column = None + target_column = 'target' if 'target' in reference_data.columns else None + prediction_column = 'prediction' if 'prediction' in reference_data.columns else None + + utility_columns = [date_column, id_column, target_column, prediction_column] + + num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns)) + cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns)) + + #target_names = None + + if target_column is not None and prediction_column is not None: + reference_data.replace([np.inf, -np.inf], np.nan, inplace=True) + reference_data.dropna(axis=0, how='any', inplace=True) + + array_prediction = reference_data[prediction_column].to_numpy() + + prediction_ids = np.argmax(array_prediction, axis=-1) + prediction_labels = [prediction_column[x] for x in prediction_ids] + + #plot support bar + graphs = [] + + for label in prediction_column: + + pred_distr = ff.create_distplot( + [ + reference_data[reference_data[target_column] == label][label], + reference_data[reference_data[target_column] != label][label] + ], + [str(label), "other"], + colors=[red, grey], + bin_size = 0.05, + show_curve = False, + show_rug=True + ) + + pred_distr.update_layout( + xaxis_title = "Probability", + yaxis_title = "Share", + legend = dict( + orientation="h", + yanchor="bottom", + y=1.02, + xanchor="right", + x=1 + ) + ) + + pred_distr_json = json.loads(pred_distr.to_json()) + + graphs.append({ + "id": "tab_" + str(label), + "title": str(label), + "graph":{ + "data":pred_distr_json["data"], + "layout":pred_distr_json["layout"], + } + }) + + self.wi = BaseWidgetInfo( + title=self.title, + type="tabbed_graph", + details="", + alertStats=AlertStats(), + alerts=[], + alertsPosition="row", + insights=[], + size=1 if production_data is not None else 2, + params={ + "graphs": graphs + }, + additionalGraphs=[], + ) + else: + self.wi = None + diff --git a/evidently/widgets/reg_prod_quality_metrics_widget.py b/evidently/widgets/reg_prod_quality_metrics_widget.py index a2b37319d2..a9044eac64 100644 --- a/evidently/widgets/reg_prod_quality_metrics_widget.py +++ b/evidently/widgets/reg_prod_quality_metrics_widget.py @@ -68,23 +68,14 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, me = np.mean(production_data[prediction_column] - production_data[target_column]) sde = np.std(production_data[prediction_column] - production_data[target_column], ddof = 1) - abs_err = list(map(lambda x : abs(x[0] - x[1]), - zip(production_data[prediction_column], production_data[target_column]))) + abs_err = np.abs(production_data[prediction_column] - production_data[target_column]) mae = np.mean(abs_err) sdae = np.std(abs_err, ddof = 1) - abs_perc_err = list(map(lambda x : 100*abs(x[0] - x[1])/x[0], - zip(production_data[prediction_column], production_data[target_column]))) + abs_perc_err = 100.*np.abs(production_data[prediction_column] - production_data[target_column])/production_data[target_column] mape = np.mean(abs_perc_err) sdape = np.std(abs_perc_err, ddof = 1) - #sqrt_err = list(map(lambda x : (x[0] - x[1])**2, - # zip(production_data[prediction_column], production_data[target_column]))) - #mse = np.mean(sqrt_err) - #sdse = np.std(sqrt_err, ddof = 1) - - #error_norm_json = json.loads(error_norm.to_json()) - self.wi = BaseWidgetInfo( title=self.title, type="counter", diff --git a/evidently/widgets/reg_ref_quality_metrics_widget.py b/evidently/widgets/reg_ref_quality_metrics_widget.py index 3d73d071b6..a5341bf846 100644 --- a/evidently/widgets/reg_ref_quality_metrics_widget.py +++ b/evidently/widgets/reg_ref_quality_metrics_widget.py @@ -66,24 +66,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, #calculate quality metrics me = np.mean(reference_data[prediction_column] - reference_data[target_column]) sde = np.std(reference_data[prediction_column] - reference_data[target_column], ddof = 1) - - abs_err = list(map(lambda x : abs(x[0] - x[1]), - zip(reference_data[target_column], reference_data[prediction_column]))) + + abs_err = np.abs(reference_data[prediction_column] - reference_data[target_column]) mae = np.mean(abs_err) sdae = np.std(abs_err, ddof = 1) - abs_perc_err = list(map(lambda x : 100*abs(x[0] - x[1])/x[0], - zip(reference_data[target_column], reference_data[prediction_column]))) + abs_perc_err = 100.*np.abs(reference_data[prediction_column] - reference_data[target_column])/reference_data[target_column] mape = np.mean(abs_perc_err) sdape = np.std(abs_perc_err, ddof = 1) - #sqrt_err = list(map(lambda x : (x[0] - x[1])**2, - # zip(reference_data[target_column], reference_data[prediction_column]))) - #mse = np.mean(sqrt_err) - #sdse = np.std(sqrt_err, ddof = 1) - - #error_norm_json = json.loads(error_norm.to_json()) - self.wi = BaseWidgetInfo( title="Reference: Model Quality (+/- std)", type="counter",