diff --git a/.gitignore b/.gitignore
index c5f32eeaa1..aa9833b22d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,5 @@ evidently/examples/.DS_Store
dist
build
MANIFEST
+
+__pycache__
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000..d10e8fefd2
--- /dev/null
+++ b/config.json
@@ -0,0 +1,22 @@
+{
+ "data_format": {
+ "separator": ",",
+ "header": true,
+ "date_column": "dteday"
+ },
+ "column_mapping" : {},
+ "profile_sections": ["data_drift"],
+ "pretty_print": true,
+ "sampling": {
+ "reference": {
+ "type": "none",
+ "n": 1,
+ "ratio": 0.1
+ },
+ "current": {
+ "type": "nth",
+ "n": 2,
+ "ratio": 0.1
+ }
+ }
+}
\ No newline at end of file
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000000..f79d65ee38
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,18 @@
+data_format:
+ separator: ","
+ header: true
+ date_column: "dteday"
+column_mapping: {}
+profile_sections:
+ - "data_drift"
+pretty_print: true
+sampling:
+ reference:
+ type: "simple" # could be "none", "simple", "random"
+ n: 5 # used with simple sampling, number of rows to skip
+ ratio: 0.1 # used with random sampling, part of data to take from chunk
+ random_seed: 4 # used with random sampling, used as seed for random generator
+ current:
+ type: "nth" # could be "none", "simple", "random"
+ n: 5 # used with simple sampling, number of rows to skip
+ ratio: 0.1 # used with random sampling, part of data to take from chunk
\ No newline at end of file
diff --git a/evidently/__main__.py b/evidently/__main__.py
index fee7d9b33e..d09d4758d7 100644
--- a/evidently/__main__.py
+++ b/evidently/__main__.py
@@ -1,12 +1,16 @@
import argparse
import json
+import logging
import os
import sys
from typing import Dict, List
from dataclasses import dataclass
+import yaml
+
from evidently.runner.dashboard_runner import DashboardRunnerOptions, DashboardRunner
+from evidently.runner.loader import SamplingOptions
from evidently.runner.profile_runner import ProfileRunner, ProfileRunnerOptions
from evidently.runner.runner import DataOptions
@@ -18,10 +22,17 @@ class DataFormatOptions:
date_column: str
+@dataclass
+class Sampling:
+ reference: SamplingOptions
+ current: SamplingOptions
+
+
@dataclass
class CalculateOptions:
data_format: DataFormatOptions
column_mapping: Dict[str, str]
+ sampling: Sampling
@dataclass
@@ -35,22 +46,42 @@ class ProfileOptions(CalculateOptions):
pretty_print: bool = False
+def __get_not_none(d, key, default):
+ return default if d.get(key, None) is None else d.get(key)
+
+
def calculate_dashboard(config: str, reference: str, current: str, output_path: str, report_name: str, **_kv):
with open(config) as f_config:
- opts_data = json.load(f_config)
+ if config.endswith(".yaml") or config.endswith(".yml"):
+ opts_data = yaml.load(f_config, Loader=yaml.SafeLoader)
+ elif config.endswith(".json"):
+ opts_data = json.load(f_config)
+ else:
+ raise Exception(f"config .{config.split('.')[-1]} not supported")
+
+ sampling = __get_not_none(opts_data, "sampling", {})
+ ref_sampling = __get_not_none(sampling, "reference", {})
+ cur_sampling = __get_not_none(sampling, "current", {})
+
opts = DashboardOptions(data_format=DataFormatOptions(**opts_data["data_format"]),
column_mapping=opts_data["column_mapping"],
- dashboard_tabs=opts_data["dashboard_tabs"])
+ dashboard_tabs=opts_data["dashboard_tabs"],
+ sampling=Sampling(
+ reference=SamplingOptions(**ref_sampling),
+ current=SamplingOptions(**cur_sampling),
+ ))
runner = DashboardRunner(DashboardRunnerOptions(
reference_data_path=reference,
reference_data_options=DataOptions(date_column=opts.data_format.date_column,
separator=opts.data_format.separator,
header=opts.data_format.header),
- production_data_path=current,
- production_data_options=DataOptions(date_column=opts.data_format.date_column,
- separator=opts.data_format.separator,
- header=opts.data_format.header),
+ reference_data_sampling=opts.sampling.reference,
+ current_data_path=current,
+ current_data_options=DataOptions(date_column=opts.data_format.date_column,
+ separator=opts.data_format.separator,
+ header=opts.data_format.header),
+ current_data_sampling=opts.sampling.current,
dashboard_tabs=opts.dashboard_tabs,
column_mapping=opts.column_mapping,
output_path=os.path.join(output_path, report_name),
@@ -60,21 +91,37 @@ def calculate_dashboard(config: str, reference: str, current: str, output_path:
def calculate_profile(config: str, reference: str, current: str, output_path: str, report_name: str, **_kv):
with open(config) as f_config:
- opts_data = json.load(f_config)
+ if config.endswith(".yaml") or config.endswith(".yml"):
+ opts_data = yaml.load(f_config, Loader=yaml.SafeLoader)
+ elif config.endswith(".json"):
+ opts_data = json.load(f_config)
+ else:
+ raise Exception(f"config .{config.split('.')[-1]} not supported")
+
+ sampling = __get_not_none(opts_data, "sampling", {})
+ ref_sampling = __get_not_none(sampling, "reference", {})
+ cur_sampling = __get_not_none(sampling, "current", {})
+
opts = ProfileOptions(data_format=DataFormatOptions(**opts_data["data_format"]),
column_mapping=opts_data["column_mapping"],
profile_parts=opts_data["profile_sections"],
- pretty_print=opts_data["pretty_print"])
+ pretty_print=opts_data["pretty_print"],
+ sampling=Sampling(
+ reference=SamplingOptions(**ref_sampling),
+ current=SamplingOptions(**cur_sampling),
+ ))
runner = ProfileRunner(ProfileRunnerOptions(
reference_data_path=reference,
reference_data_options=DataOptions(date_column=opts.data_format.date_column,
separator=opts.data_format.separator,
header=opts.data_format.header),
- production_data_path=current,
- production_data_options=DataOptions(date_column=opts.data_format.date_column,
- separator=opts.data_format.separator,
- header=opts.data_format.header),
+ reference_data_sampling=opts.sampling.reference,
+ current_data_path=current,
+ current_data_options=DataOptions(date_column=opts.data_format.date_column,
+ separator=opts.data_format.separator,
+ header=opts.data_format.header),
+ current_data_sampling=opts.sampling.current,
profile_parts=opts.profile_parts,
column_mapping=opts.column_mapping,
output_path=os.path.join(output_path, report_name),
@@ -92,10 +139,13 @@ def _add_default_parameters(configurable_parser, default_output_name: str):
configurable_parser.add_argument("--reference", dest="reference", required=True, help="Path to reference data")
configurable_parser.add_argument("--current", dest="current", help="Path to current data")
configurable_parser.add_argument("--output_path", dest="output_path", required=True, help="Path to store report")
- configurable_parser.add_argument("--report_name", dest="report_name", default=default_output_name, help="Report name")
+ configurable_parser.add_argument("--report_name", dest="report_name", default=default_output_name,
+ help="Report name")
configurable_parser.add_argument("--config", dest="config", required=True, help="Path to configuration")
+logging.basicConfig(level=logging.INFO)
+
parser = argparse.ArgumentParser()
parsers = parser.add_subparsers()
diff --git a/evidently/analyzers/cat_target_drift_analyzer.py b/evidently/analyzers/cat_target_drift_analyzer.py
index d301b51a55..165dbeaa07 100644
--- a/evidently/analyzers/cat_target_drift_analyzer.py
+++ b/evidently/analyzers/cat_target_drift_analyzer.py
@@ -10,7 +10,7 @@
class CatTargetDriftAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -49,25 +49,25 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
ref_feature_vc = reference_data[target_column].value_counts()
- prod_feature_vc = production_data[target_column].value_counts()
+ current_feature_vc = current_data[target_column].value_counts()
keys = set(list(reference_data[target_column].unique()) +
- list(production_data[target_column].unique()))
+ list(current_data[target_column].unique()))
ref_feature_dict = dict.fromkeys(keys, 0)
for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
ref_feature_dict[key] = item
- prod_feature_dict = dict.fromkeys(keys, 0)
- for key, item in zip(prod_feature_vc.index, prod_feature_vc.values):
- prod_feature_dict[key] = item
+ current_feature_dict = dict.fromkeys(keys, 0)
+ for key, item in zip(current_feature_vc.index, current_feature_vc.values):
+ current_feature_dict[key] = item
f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
- f_obs = [value[1] for value in sorted(prod_feature_dict.items())]
+ f_obs = [value[1] for value in sorted(current_feature_dict.items())]
target_p_value = chisquare(f_exp, f_obs)[1]
result['metrics']["target_name"] = target_column
@@ -80,25 +80,25 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
ref_feature_vc = reference_data[prediction_column].value_counts()
- prod_feature_vc = production_data[prediction_column].value_counts()
+ current_feature_vc = current_data[prediction_column].value_counts()
keys = set(list(reference_data[prediction_column].unique()) +
- list(production_data[prediction_column].unique()))
+ list(current_data[prediction_column].unique()))
ref_feature_dict = dict.fromkeys(keys, 0)
for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
ref_feature_dict[key] = item
- prod_feature_dict = dict.fromkeys(keys, 0)
- for key, item in zip(prod_feature_vc.index, prod_feature_vc.values):
- prod_feature_dict[key] = item
+ current_feature_dict = dict.fromkeys(keys, 0)
+ for key, item in zip(current_feature_vc.index, current_feature_vc.values):
+ current_feature_dict[key] = item
f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
- f_obs = [value[1] for value in sorted(prod_feature_dict.items())]
+ f_obs = [value[1] for value in sorted(current_feature_dict.items())]
pred_p_value = chisquare(f_exp, f_obs)[1]
result['metrics']["prediction_name"] = prediction_column
diff --git a/evidently/analyzers/classification_performance_analyzer.py b/evidently/analyzers/classification_performance_analyzer.py
index f06ce395d3..2d721d55f1 100644
--- a/evidently/analyzers/classification_performance_analyzer.py
+++ b/evidently/analyzers/classification_performance_analyzer.py
@@ -10,7 +10,7 @@
from sklearn import metrics
class ClassificationPerformanceAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -83,18 +83,18 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics']['reference']['confusion_matrix']['labels'] = labels
result['metrics']['reference']['confusion_matrix']['values'] = conf_matrix.tolist()
- if production_data is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
result['metrics']['current'] = {}
- accuracy_score = metrics.accuracy_score(production_data[target_column], production_data[prediction_column])
- avg_precision = metrics.precision_score(production_data[target_column], production_data[prediction_column],
+ accuracy_score = metrics.accuracy_score(current_data[target_column], current_data[prediction_column])
+ avg_precision = metrics.precision_score(current_data[target_column], current_data[prediction_column],
average='macro')
- avg_recall = metrics.recall_score(production_data[target_column], production_data[prediction_column],
+ avg_recall = metrics.recall_score(current_data[target_column], current_data[prediction_column],
average='macro')
- avg_f1 = metrics.f1_score(production_data[target_column], production_data[prediction_column],
+ avg_f1 = metrics.f1_score(current_data[target_column], current_data[prediction_column],
average='macro')
result['metrics']['current']['accuracy'] = accuracy_score
@@ -103,15 +103,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics']['current']['f1'] = avg_f1
#calculate class support and metrics matrix
- metrics_matrix = metrics.classification_report(production_data[target_column], production_data[prediction_column],
+ metrics_matrix = metrics.classification_report(current_data[target_column], current_data[prediction_column],
output_dict=True)
result['metrics']['current']['metrics_matrix'] = metrics_matrix
#calculate confusion matrix
- conf_matrix = metrics.confusion_matrix(production_data[target_column],
- production_data[prediction_column])
- labels = target_names if target_names else sorted(set(production_data[target_column]))
+ conf_matrix = metrics.confusion_matrix(current_data[target_column],
+ current_data[prediction_column])
+ labels = target_names if target_names else sorted(set(current_data[target_column]))
result['metrics']['current']['confusion_matrix'] = {}
result['metrics']['current']['confusion_matrix']['labels'] = labels
diff --git a/evidently/analyzers/data_drift_analyzer.py b/evidently/analyzers/data_drift_analyzer.py
index 4da35782e3..9dfb15e581 100644
--- a/evidently/analyzers/data_drift_analyzer.py
+++ b/evidently/analyzers/data_drift_analyzer.py
@@ -10,7 +10,7 @@
class DataDriftAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -47,37 +47,37 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics'] = {}
for feature_name in num_feature_names:
result['metrics'][feature_name] = dict(
- prod_small_hist=[t.tolist() for t in np.histogram(production_data[feature_name][np.isfinite(production_data[feature_name])],
+ current_small_hist=[t.tolist() for t in np.histogram(current_data[feature_name][np.isfinite(current_data[feature_name])],
bins=10, density=True)],
ref_small_hist=[t.tolist() for t in np.histogram(reference_data[feature_name][np.isfinite(reference_data[feature_name])],
bins=10, density=True)],
feature_type='num',
- p_value=ks_2samp(reference_data[feature_name], production_data[feature_name])[1]
+ p_value=ks_2samp(reference_data[feature_name], current_data[feature_name])[1]
)
for feature_name in cat_feature_names:
ref_feature_vc = reference_data[feature_name][np.isfinite(reference_data[feature_name])].value_counts()
- prod_feature_vc = production_data[feature_name][np.isfinite(production_data[feature_name])].value_counts()
+ current_feature_vc = current_data[feature_name][np.isfinite(current_data[feature_name])].value_counts()
keys = set(list(reference_data[feature_name][np.isfinite(reference_data[feature_name])].unique()) +
- list(production_data[feature_name][np.isfinite(production_data[feature_name])].unique()))
+ list(current_data[feature_name][np.isfinite(current_data[feature_name])].unique()))
ref_feature_dict = dict.fromkeys(keys, 0)
for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
ref_feature_dict[key] = item
- prod_feature_dict = dict.fromkeys(keys, 0)
- for key, item in zip(prod_feature_vc.index, prod_feature_vc.values):
- prod_feature_dict[key] = item
+ current_feature_dict = dict.fromkeys(keys, 0)
+ for key, item in zip(current_feature_vc.index, current_feature_vc.values):
+ current_feature_dict[key] = item
f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
- f_obs = [value[1] for value in sorted(prod_feature_dict.items())]
+ f_obs = [value[1] for value in sorted(current_feature_dict.items())]
# CHI2 to be implemented for cases with different categories
p_value = chisquare(f_exp, f_obs)[1]
result['metrics'][feature_name] = dict(
- prod_small_hist=[t.tolist() for t in np.histogram(production_data[feature_name][np.isfinite(production_data[feature_name])],
+ current_small_hist=[t.tolist() for t in np.histogram(current_data[feature_name][np.isfinite(current_data[feature_name])],
bins=10, density=True)],
ref_small_hist=[t.tolist() for t in np.histogram(reference_data[feature_name][np.isfinite(reference_data[feature_name])],
bins=10, density=True)],
diff --git a/evidently/analyzers/num_target_drift_analyzer.py b/evidently/analyzers/num_target_drift_analyzer.py
index 47a5159724..e133b324d7 100644
--- a/evidently/analyzers/num_target_drift_analyzer.py
+++ b/evidently/analyzers/num_target_drift_analyzer.py
@@ -10,7 +10,7 @@
class NumTargetDriftAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -47,28 +47,28 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target
if target_column is not None:
#drift
- target_p_value = ks_2samp(reference_data[target_column], production_data[target_column])[1]
+ target_p_value = ks_2samp(reference_data[target_column], current_data[target_column])[1]
result['metrics']["target_name"] = target_column
result['metrics']["target_type"] = 'num'
result['metrics']["target_drift"] = target_p_value
#corr
ref_target_corr = reference_data[num_feature_names + [target_column]].corr()[target_column]
- curr_target_corr = production_data[num_feature_names + [target_column]].corr()[target_column]
+ curr_target_corr = current_data[num_feature_names + [target_column]].corr()[target_column]
target_corr = {'reference':ref_target_corr.to_dict(), 'current':curr_target_corr.to_dict()}
result['metrics']['target_correlations'] = target_corr
#prediction
if prediction_column is not None:
#drift
- pred_p_value = ks_2samp(reference_data[prediction_column], production_data[prediction_column])[1]
+ pred_p_value = ks_2samp(reference_data[prediction_column], current_data[prediction_column])[1]
result['metrics']["prediction_name"] = prediction_column
result['metrics']["prediction_type"] = 'num'
result['metrics']["prediction_drift"] = pred_p_value
#corr
ref_pred_corr = reference_data[num_feature_names + [prediction_column]].corr()[prediction_column]
- curr_pred_corr = production_data[num_feature_names + [prediction_column]].corr()[prediction_column]
+ curr_pred_corr = current_data[num_feature_names + [prediction_column]].corr()[prediction_column]
prediction_corr = {'reference':ref_pred_corr.to_dict(), 'current':curr_pred_corr.to_dict()}
result['metrics']['prediction_correlations'] = prediction_corr
diff --git a/evidently/analyzers/prob_classification_performance_analyzer.py b/evidently/analyzers/prob_classification_performance_analyzer.py
index 473a92ef0c..5ffad9e9a2 100644
--- a/evidently/analyzers/prob_classification_performance_analyzer.py
+++ b/evidently/analyzers/prob_classification_performance_analyzer.py
@@ -10,7 +10,7 @@
from sklearn import metrics, preprocessing
class ProbClassificationPerformanceAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -171,15 +171,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
pr_table.append([top, int(count), prob, int(tp), int(fp), precision, recall])
result['metrics']['reference']['pr_curve'][label] = pr_table
- if production_data is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
- binaraized_target = binaraizer.transform(production_data[target_column])
+ binaraized_target = binaraizer.transform(current_data[target_column])
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
@@ -191,15 +191,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
roc_auc = metrics.roc_auc_score(binaraized_target, array_prediction, average='macro')
log_loss = metrics.log_loss(binaraized_target, array_prediction)
else:
- roc_auc = metrics.roc_auc_score(binaraized_target, production_data[prediction_column[0]]) #problem!!!
- log_loss = metrics.log_loss(binaraized_target, production_data[prediction_column[0]]) #problem!!!
+ roc_auc = metrics.roc_auc_score(binaraized_target, current_data[prediction_column[0]]) #problem!!!
+ log_loss = metrics.log_loss(binaraized_target, current_data[prediction_column[0]]) #problem!!!
- accuracy_score = metrics.accuracy_score(production_data[target_column], prediction_labels)
- avg_precision = metrics.precision_score(production_data[target_column], prediction_labels,
+ accuracy_score = metrics.accuracy_score(current_data[target_column], prediction_labels)
+ avg_precision = metrics.precision_score(current_data[target_column], prediction_labels,
average='macro')
- avg_recall = metrics.recall_score(production_data[target_column], prediction_labels,
+ avg_recall = metrics.recall_score(current_data[target_column], prediction_labels,
average='macro')
- avg_f1 = metrics.f1_score(production_data[target_column], prediction_labels,
+ avg_f1 = metrics.f1_score(current_data[target_column], prediction_labels,
average='macro')
result['metrics']['current']['accuracy'] = accuracy_score
@@ -210,7 +210,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics']['current']['log_loss'] = log_loss
#calculate class support and metrics matrix
- metrics_matrix = metrics.classification_report(production_data[target_column], prediction_labels,
+ metrics_matrix = metrics.classification_report(current_data[target_column], prediction_labels,
output_dict=True)
result['metrics']['current']['metrics_matrix'] = metrics_matrix
@@ -219,7 +219,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics']['current']['roc_aucs'] = roc_aucs.tolist()
#calculate confusion matrix
- conf_matrix = metrics.confusion_matrix(production_data[target_column],
+ conf_matrix = metrics.confusion_matrix(current_data[target_column],
prediction_labels)
result['metrics']['current']['confusion_matrix'] = {}
@@ -229,20 +229,20 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#calulate ROC and PR curves, PR table
if len(prediction_column) <= 2:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = ['target']
- fpr, tpr, thrs = metrics.roc_curve(binaraized_target, production_data[prediction_column[0]])
+ fpr, tpr, thrs = metrics.roc_curve(binaraized_target, current_data[prediction_column[0]])
result['metrics']['current']['roc_curve'] = {'fpr':fpr.tolist(), 'tpr':tpr.tolist(), 'thrs':thrs.tolist()}
- pr, rcl, thrs = metrics.precision_recall_curve(binaraized_target, production_data[prediction_column[0]])
+ pr, rcl, thrs = metrics.precision_recall_curve(binaraized_target, current_data[prediction_column[0]])
result['metrics']['current']['pr_curve'] = {'pr':pr.tolist(), 'rcl':rcl.tolist(), 'thrs':thrs.tolist()}
pr_table = []
step_size = 0.05
binded = list(zip(binaraized_target['target'].tolist(),
- production_data[prediction_column[0]].tolist()))
+ current_data[prediction_column[0]].tolist()))
binded.sort(key = lambda item: item[1], reverse = True)
data_size = len(binded)
target_class_size = sum([x[0] for x in binded])
@@ -260,23 +260,23 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
else:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = prediction_column
result['metrics']['current']['roc_curve'] = {}
result['metrics']['current']['pr_curve'] = {}
for label in prediction_column:
- fpr, tpr, thrs = metrics.roc_curve(binaraized_target[label], production_data[label])
+ fpr, tpr, thrs = metrics.roc_curve(binaraized_target[label], current_data[label])
result['metrics']['current']['roc_curve'][label] = {'fpr':fpr.tolist(), 'tpr':tpr.tolist(), 'thrs':thrs.tolist()}
- pr, rcl, thrs = metrics.precision_recall_curve(binaraized_target[label], production_data[label])
+ pr, rcl, thrs = metrics.precision_recall_curve(binaraized_target[label], current_data[label])
result['metrics']['current']['pr_curve'][label] = {'pr':pr.tolist(), 'rcl':rcl.tolist(), 'thrs':thrs.tolist()}
pr_table = []
step_size = 0.05
binded = list(zip(binaraized_target[label].tolist(),
- production_data[label].tolist()))
+ current_data[label].tolist()))
binded.sort(key = lambda item: item[1], reverse = True)
data_size = len(binded)
target_class_size = sum([x[0] for x in binded])
diff --git a/evidently/analyzers/regression_performance_analyzer.py b/evidently/analyzers/regression_performance_analyzer.py
index 4da39e5235..1e3df96260 100644
--- a/evidently/analyzers/regression_performance_analyzer.py
+++ b/evidently/analyzers/regression_performance_analyzer.py
@@ -10,7 +10,7 @@
from sklearn import metrics
class RegressionPerformanceAnalyzer(Analyzer):
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping):
result = dict()
if column_mapping:
date_column = column_mapping.get('datetime')
@@ -97,19 +97,19 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
result['metrics']['reference']['underperformance']['underestimation'] = {'mean_error':float(mae_under), 'std_error':float(sd_under)}
result['metrics']['reference']['underperformance']['overestimation'] = {'mean_error':float(mae_over), 'std_error':float(sd_over)}
- if production_data is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#calculate quality metrics
- me = np.mean(production_data[prediction_column] - production_data[target_column])
- sde = np.std(production_data[prediction_column] - production_data[target_column], ddof = 1)
+ me = np.mean(current_data[prediction_column] - current_data[target_column])
+ sde = np.std(current_data[prediction_column] - current_data[target_column], ddof = 1)
- abs_err = np.abs(production_data[prediction_column] - production_data[target_column])
+ abs_err = np.abs(current_data[prediction_column] - current_data[target_column])
mae = np.mean(abs_err)
sdae = np.std(abs_err, ddof = 1)
- abs_perc_err = 100.*np.abs(production_data[prediction_column] - production_data[target_column])/production_data[target_column]
+ abs_perc_err = 100.*np.abs(current_data[prediction_column] - current_data[target_column])/current_data[target_column]
mape = np.mean(abs_perc_err)
sdape = np.std(abs_perc_err, ddof = 1)
@@ -117,8 +117,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
'error_std':float(sde), 'abs_error_std':float(sdae), 'abs_perc_error_std':float(sdape)}
#error normality
- prod_error = production_data[prediction_column] - production_data[target_column]
- qq_lines = probplot(prod_error, dist="norm", plot=None)
+ current_error = current_data[prediction_column] - current_data[target_column]
+ qq_lines = probplot(current_error, dist="norm", plot=None)
theoretical_q_x = np.linspace(qq_lines[0][0][0], qq_lines[0][0][-1], 100)
qq_dots = [t.tolist() for t in qq_lines[0]]
@@ -128,23 +128,23 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
'order_statistic_medians':[float(x) for x in qq_dots[1]], 'slope':float(qq_line[0]), 'intercept':float(qq_line[1]), 'r':float(qq_line[2])}
#underperformance metrics
- prod_quantile_5 = np.quantile(prod_error, .05)
- prod_quantile_95 = np.quantile(prod_error, .95)
+ current_quantile_5 = np.quantile(current_error, .05)
+ current_quantile_95 = np.quantile(current_error, .95)
- prod_mae = np.mean(prod_error)
- prod_mae_under = np.mean(prod_error[prod_error <= prod_quantile_5])
- prod_mae_exp = np.mean(prod_error[(prod_error > prod_quantile_5) & (prod_error < prod_quantile_95)])
- prod_mae_over = np.mean(prod_error[prod_error >= prod_quantile_95])
+ current_mae = np.mean(current_error)
+ current_mae_under = np.mean(current_error[current_error <= pcurrent_quantile_5])
+ current_mae_exp = np.mean(current_error[(current_error > current_quantile_5) & (current_error < current_quantile_95)])
+ current_mae_over = np.mean(current_error[current_error >= current_quantile_95])
- prod_sd = np.std(prod_error, ddof = 1)
- prod_sd_under = np.std(prod_error[prod_error <= prod_quantile_5], ddof = 1)
- prod_sd_exp = np.std(prod_error[(prod_error > prod_quantile_5) & (prod_error < prod_quantile_95)], ddof = 1)
- prod_sd_over = np.std(prod_error[prod_error >= prod_quantile_95], ddof = 1)
+ current_sd = np.std(current_error, ddof = 1)
+ current_sd_under = np.std(current_error[current_error <= current_quantile_5], ddof = 1)
+ current_sd_exp = np.std(current_error[(current_error > current_quantile_5) & (current_error < current_quantile_95)], ddof = 1)
+ current_sd_over = np.std(current_error[current_error >= current_quantile_95], ddof = 1)
result['metrics']['current']['underperformance'] = {}
- result['metrics']['current']['underperformance']['majority'] = {'mean_error':float(prod_mae_exp), 'std_error':float(prod_sd_exp)}
- result['metrics']['current']['underperformance']['underestimation'] = {'mean_error':float(prod_mae_under), 'std_error':float(prod_sd_under)}
- result['metrics']['current']['underperformance']['overestimation'] = {'mean_error':float(prod_mae_over), 'std_error':float(prod_sd_over)}
+ result['metrics']['current']['underperformance']['majority'] = {'mean_error':float(current_mae_exp), 'std_error':float(current_sd_exp)}
+ result['metrics']['current']['underperformance']['underestimation'] = {'mean_error':float(current_mae_under), 'std_error':float(current_sd_under)}
+ result['metrics']['current']['underperformance']['overestimation'] = {'mean_error':float(current_mae_over), 'std_error':float(current_sd_over)}
#error bias table
error_bias = {}
@@ -157,15 +157,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
ref_over_value = np.mean(reference_data[error >= quantile_95][feature_name])
ref_range_value = 0 if ref_over_value == ref_under_value else 100*abs(ref_over_value - ref_under_value)/(np.max(reference_data[feature_name]) - np.min(reference_data[feature_name]))
- prod_overal_value = np.mean(production_data[feature_name])
- prod_under_value = np.mean(production_data[prod_error <= prod_quantile_5][feature_name])
- prod_expected_value = np.mean(production_data[(prod_error > prod_quantile_5) & (prod_error < prod_quantile_95)][feature_name])
- prod_over_value = np.mean(production_data[prod_error >= prod_quantile_95][feature_name])
- prod_range_value = 0 if prod_over_value == prod_under_value else 100*abs(prod_over_value - prod_under_value)/(np.max(production_data[feature_name]) - np.min(production_data[feature_name]))
+ current_overal_value = np.mean(current_data[feature_name])
+ current_under_value = np.mean(current_data[current_error <= current_quantile_5][feature_name])
+ current_expected_value = np.mean(current_data[(current_error > current_quantile_5) & (current_error < current_quantile_95)][feature_name])
+ current_over_value = np.mean(current_data[current_error >= current_quantile_95][feature_name])
+ current_range_value = 0 if current_over_value == current_under_value else 100*abs(current_over_value - current_under_value)/(np.max(current_data[feature_name]) - np.min(current_data[feature_name]))
error_bias[feature_name] = {'feature_type':feature_type, 'ref_majority':float(ref_expected_value), 'ref_under':float(ref_under_value),
- 'ref_over':float(ref_over_value), 'ref_range':float(ref_range_value),'prod_majority':float(prod_expected_value), 'prod_under':float(prod_under_value),
- 'prod_over':float(prod_over_value), 'prod_range':float(prod_range_value)}
+ 'ref_over':float(ref_over_value), 'ref_range':float(ref_range_value),'current_majority':float(current_expected_value), 'current_under':float(current_under_value),
+ 'current_over':float(current_over_value), 'current_range':float(current_range_value)}
for feature_name in cat_feature_names:
feature_type = 'cat'
@@ -176,15 +176,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
ref_range_value = 1 if (ref_overal_value != ref_under_value) or (ref_over_value != ref_overal_value) \
or (ref_under_value != ref_overal_value) else 0
- prod_overal_value = production_data[feature_name].value_counts().idxmax()
- prod_under_value = production_data[prod_error <= prod_quantile_5][feature_name].value_counts().idxmax()
- prod_over_value = production_data[prod_error >= prod_quantile_95][feature_name].value_counts().idxmax()
- prod_range_value = 1 if (prod_overal_value != prod_under_value) or (prod_over_value != prod_overal_value) \
- or (prod_under_value != prod_overal_value) else 0
+ current_overal_value = current_data[feature_name].value_counts().idxmax()
+ current_under_value = current_data[current_error <= current_quantile_5][feature_name].value_counts().idxmax()
+ current_over_value = current_data[current_error >= current_quantile_95][feature_name].value_counts().idxmax()
+ current_range_value = 1 if (current_overal_value != current_under_value) or (current_over_value != current_overal_value) \
+ or (current_under_value != current_overal_value) else 0
error_bias[feature_name] = {'feature_type':feature_type, 'ref_majority':float(ref_overal_value), 'ref_under':float(ref_under_value),
- 'ref_over':float(ref_over_value), 'ref_range':float(ref_range_value),'prod_majority':float(prod_overal_value), 'prod_under':float(prod_under_value),
- 'prod_over':float(prod_over_value), 'prod_range':float(prod_range_value)}
+ 'ref_over':float(ref_over_value), 'ref_range':float(ref_range_value),'current_majority':float(current_overal_value), 'current_under':float(current_under_value),
+ 'current_over':float(current_over_value), 'current_range':float(current_range_value)}
result['metrics']['error_bias'] = error_bias
diff --git a/evidently/dashboard/dashboard.py b/evidently/dashboard/dashboard.py
index 7d8ace1a8d..89f3a8e5c7 100644
--- a/evidently/dashboard/dashboard.py
+++ b/evidently/dashboard/dashboard.py
@@ -17,6 +17,7 @@
from evidently.model.dashboard import DashboardInfo
from evidently.pipeline.pipeline import Pipeline
from evidently.tabs.base_tab import Tab
+from evidently.utils import NumpyEncoder
@dataclasses.dataclass()
@@ -27,7 +28,7 @@ class TemplateParams:
def __dashboard_info_to_json(di: DashboardInfo):
- return json.dumps(asdict(di))
+ return json.dumps(asdict(di), cls=NumpyEncoder)
def inline_template(params: TemplateParams):
@@ -136,11 +137,11 @@ def get_analyzers(self):
def calculate(self,
reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame,
+ current_data: pandas.DataFrame,
column_mapping: dict = None):
- self.execute(reference_data, production_data, column_mapping)
+ self.execute(reference_data, current_data, column_mapping)
for tab in self.tabsData:
- tab.calculate(reference_data, production_data, column_mapping, self.analyzers_results)
+ tab.calculate(reference_data, current_data, column_mapping, self.analyzers_results)
def __render(self, template: typing.Callable[[TemplateParams], str]):
dashboard_id = "evidently_dashboard_" + str(uuid.uuid4()).replace("-", "")
@@ -159,7 +160,7 @@ def _json(self):
dashboard_id = "evidently_dashboard_" + str(uuid.uuid4()).replace("-", "")
tab_widgets = [t.info() for t in self.tabsData]
di = DashboardInfo(dashboard_id, [item for tab in tab_widgets for item in tab if item is not None])
- return json.dumps(asdict(di))
+ return json.dumps(asdict(di), cls=NumpyEncoder)
def _save_to_json(self, filename):
parent_dir = os.path.dirname(filename)
diff --git a/evidently/examples/bicycle_demand_monitoring.ipynb b/evidently/examples/bicycle_demand_monitoring.ipynb
index bc1e8642ca..ea9e34046d 100644
--- a/evidently/examples/bicycle_demand_monitoring.ipynb
+++ b/evidently/examples/bicycle_demand_monitoring.ipynb
@@ -429,7 +429,7 @@
"outputs": [],
"source": [
"reference = raw_data.loc['2011-01-01 00:00:00':'2011-01-28 23:00:00']\n",
- "production = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']"
+ "current = raw_data.loc['2011-01-29 00:00:00':'2011-02-28 23:00:00']"
]
},
{
@@ -652,7 +652,7 @@
"outputs": [],
"source": [
"ref_prediction = regressor.predict(reference[numerical_features + categorical_features])\n",
- "prod_prediction = regressor.predict(production[numerical_features + categorical_features])"
+ "current_prediction = regressor.predict(current[numerical_features + categorical_features])"
]
},
{
@@ -662,7 +662,7 @@
"outputs": [],
"source": [
"reference['prediction'] = ref_prediction\n",
- "production['prediction'] = prod_prediction"
+ "current['prediction'] = current_prediction"
]
},
{
@@ -717,22 +717,22 @@
"}\n",
"\n",
"\n",
"\n",
- "
Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -754,7 +754,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#regression_perfomance_dashboard.save('regression_performance_at_training.html')"
+ "#regression_perfomance_dashboard.save('reports/regression_performance_at_training.html')"
]
},
{
@@ -770,7 +770,7 @@
"metadata": {},
"outputs": [],
"source": [
- "regression_perfomance_dashboard.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
+ "regression_perfomance_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -795,22 +795,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -832,7 +832,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#regression_perfomance_dashboard.save('regression_performance_after_week1.html')"
+ "#regression_perfomance_dashboard.save('reports/regression_performance_after_week1.html')"
]
},
{
@@ -842,7 +842,7 @@
"outputs": [],
"source": [
"target_drift_dashboard = Dashboard(tabs=[NumTargetDriftTab])\n",
- "target_drift_dashboard.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
+ "target_drift_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -867,22 +867,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -904,7 +904,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#target_drift_dashboard.save('target_drift_after_week1.html')"
+ "#target_drift_dashboard.save('reports/target_drift_after_week1.html')"
]
},
{
@@ -920,7 +920,7 @@
"metadata": {},
"outputs": [],
"source": [
- "regression_perfomance_dashboard.calculate(reference, production.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], \n",
+ "regression_perfomance_dashboard.calculate(reference, current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -945,22 +945,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -982,7 +982,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#regression_perfomance_dashboard.save('regression_performance_after_week2.html')"
+ "#regression_perfomance_dashboard.save('reports/regression_performance_after_week2.html')"
]
},
{
@@ -991,7 +991,7 @@
"metadata": {},
"outputs": [],
"source": [
- "target_drift_dashboard.calculate(reference, production.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], \n",
+ "target_drift_dashboard.calculate(reference, current.loc['2011-02-07 00:00:00':'2011-02-14 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -1016,22 +1016,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -1053,7 +1053,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#target_drift_dashboard.save('target_drift_after_week2.html')"
+ "#target_drift_dashboard.save('reports/target_drift_after_week2.html')"
]
},
{
@@ -1069,7 +1069,7 @@
"metadata": {},
"outputs": [],
"source": [
- "regression_perfomance_dashboard.calculate(reference, production.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], \n",
+ "regression_perfomance_dashboard.calculate(reference, current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -1094,22 +1094,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -1131,7 +1131,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#regression_perfomance_dashboard.save('regression_performance_after_week3.html')"
+ "#regression_perfomance_dashboard.save('reports/regression_performance_after_week3.html')"
]
},
{
@@ -1140,7 +1140,7 @@
"metadata": {},
"outputs": [],
"source": [
- "target_drift_dashboard.calculate(reference, production.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], \n",
+ "target_drift_dashboard.calculate(reference, current.loc['2011-02-15 00:00:00':'2011-02-21 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
@@ -1165,22 +1165,22 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
@@ -1202,7 +1202,7 @@
"metadata": {},
"outputs": [],
"source": [
- "#target_drift_dashboard.save('target_drift_after_week3.html')"
+ "#target_drift_dashboard.save('reports/target_drift_after_week3.html')"
]
},
{
@@ -1225,18 +1225,18 @@
},
{
"cell_type": "code",
- "execution_count": 37,
+ "execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"data_drift_dashboard = Dashboard(tabs=[DataDriftTab])\n",
- "data_drift_dashboard.calculate(reference, production.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
+ "data_drift_dashboard.calculate(reference, current.loc['2011-01-29 00:00:00':'2011-02-07 23:00:00'], \n",
" column_mapping=column_mapping)"
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -1253,29 +1253,29 @@
"}\n",
"\n",
"\n",
"\n",
- "Loading...
\n",
+ "Loading...
\n",
"\n"
],
"text/plain": [
""
]
},
- "execution_count": 38,
+ "execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
@@ -1286,11 +1286,11 @@
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
- "#data_drift_dashboard.save(\"data_drift_dashboard_after_week1.html\")"
+ "#data_drift_dashboard.save(\"reports/data_drift_dashboard_after_week1.html\")"
]
}
],
diff --git a/evidently/model_profile/model_profile.py b/evidently/model_profile/model_profile.py
index 0c38311155..e48e275b1a 100644
--- a/evidently/model_profile/model_profile.py
+++ b/evidently/model_profile/model_profile.py
@@ -1,10 +1,13 @@
import json
-import pandas
+
+import pandas
+import numpy as np
from datetime import datetime
from typing import List, Type
from evidently.pipeline.pipeline import Pipeline
from evidently.profile_sections.base_profile_section import ProfileSection
+from evidently.utils import NumpyEncoder
class Profile(Pipeline):
@@ -14,17 +17,17 @@ def __init__(self, sections: List[Type[ProfileSection]]):
def calculate(self,
reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame,
+ current_data: pandas.DataFrame,
column_mapping: dict = None):
- self.execute(reference_data, production_data, column_mapping)
+ self.execute(reference_data, current_data, column_mapping)
def get_analyzers(self):
return list(set([analyzer for tab in self.parts for analyzer in tab.analyzers()]))
def json(self):
- return json.dumps(self.object())
+ return json.dumps(self.object(), cls=NumpyEncoder)
def object(self):
result = dict([(part.part_id(), part.calculate(self.analyzers_results)) for part in self.parts])
result["timestamp"] = str(datetime.now())
- return result
+ return result
\ No newline at end of file
diff --git a/evidently/pipeline/pipeline.py b/evidently/pipeline/pipeline.py
index f005d3b082..89a2e1113a 100644
--- a/evidently/pipeline/pipeline.py
+++ b/evidently/pipeline/pipeline.py
@@ -12,7 +12,7 @@ def get_analyzers(self):
def execute(self,
reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame,
+ current_data: pandas.DataFrame,
column_mapping: dict = None):
for analyzer in self.get_analyzers():
- self.analyzers_results[analyzer] = analyzer().calculate(reference_data, production_data, column_mapping)
+ self.analyzers_results[analyzer] = analyzer().calculate(reference_data, current_data, column_mapping)
diff --git a/evidently/profile/__pycache__/__init__.cpython-36.pyc b/evidently/profile/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 6dff6567cb..0000000000
Binary files a/evidently/profile/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/evidently/profile/__pycache__/data_drift_profile.cpython-36.pyc b/evidently/profile/__pycache__/data_drift_profile.cpython-36.pyc
deleted file mode 100644
index 3ffa9dbe71..0000000000
Binary files a/evidently/profile/__pycache__/data_drift_profile.cpython-36.pyc and /dev/null differ
diff --git a/evidently/profile/__pycache__/profile.cpython-36.pyc b/evidently/profile/__pycache__/profile.cpython-36.pyc
deleted file mode 100644
index b1d72af241..0000000000
Binary files a/evidently/profile/__pycache__/profile.cpython-36.pyc and /dev/null differ
diff --git a/evidently/runner/dashboard_runner.py b/evidently/runner/dashboard_runner.py
index 1a74ee44c7..05435bba9f 100644
--- a/evidently/runner/dashboard_runner.py
+++ b/evidently/runner/dashboard_runner.py
@@ -28,7 +28,7 @@ def __init__(self, options: DashboardRunnerOptions):
self.options = options
def run(self):
- (reference_data, production_data) = self._parse_data()
+ (reference_data, current_data) = self._parse_data()
tabs = []
@@ -39,5 +39,5 @@ def run(self):
tabs.append(tab_class)
dashboard = Dashboard(tabs=tabs)
- dashboard.calculate(reference_data, production_data, self.options.column_mapping)
+ dashboard.calculate(reference_data, current_data, self.options.column_mapping)
dashboard.save(self.options.output_path + ".html")
diff --git a/evidently/runner/loader.py b/evidently/runner/loader.py
new file mode 100644
index 0000000000..6066a19599
--- /dev/null
+++ b/evidently/runner/loader.py
@@ -0,0 +1,90 @@
+import dataclasses
+import logging
+import random
+from typing import Callable, Union, Optional, List
+
+import pandas as pd
+
+
+@dataclasses.dataclass
+class SamplingOptions:
+ type: str = "none"
+ random_seed: int = 1
+ ratio: float = 1.0
+ n: int = 1
+
+
+@dataclasses.dataclass
+class DataOptions:
+ date_column: str
+ separator: str
+ # is csv file contains header row
+ header: bool
+ # should be list of names, or None if columns should be inferred from data
+ column_names: Optional[List[str]]
+
+ def __init__(self, date_column: str = "datetime", separator=",", header=True, column_names=None):
+ self.date_column = date_column
+ self.header = header
+ self.separator = separator
+ self.column_names = column_names
+
+
+def _skiprows(sampling_options: SamplingOptions) -> Union[Callable[[int], bool], None]:
+ if sampling_options.type == "none":
+ return None
+ if sampling_options.type == "nth":
+ if sampling_options.n < 1:
+ raise Exception("nth sampling should have 'n' parameter >= 1")
+ return __simple(sampling_options)
+ if sampling_options.type == "random":
+ sk = RandomizedSkipRows(sampling_options.ratio, sampling_options.random_seed)
+ return sk.skiprows
+
+
+def __simple(sampling_options: SamplingOptions):
+ def func(row_idx):
+ if row_idx == 0:
+ result = False
+ else:
+ rem = row_idx % sampling_options.n
+ result = rem != 1
+ return result
+ return func
+
+
+class DataLoader:
+ def __init__(self):
+ pass
+
+ def load(self, filename: str, data_options: DataOptions, sampling_options: SamplingOptions = None):
+ sampling_opts = SamplingOptions("none", 0, 0) if sampling_options is None else sampling_options
+ parse_dates = [data_options.date_column] \
+ if data_options.date_column \
+ else False
+ return pd.read_csv(filename,
+ header=0 if data_options.header else None,
+ sep=data_options.separator,
+ skiprows=_skiprows(sampling_opts),
+ parse_dates=parse_dates)
+
+
+CHUNK_SIZE = 1000
+
+
+class RandomizedSkipRows:
+ def __init__(self, ratio: float, random_seed: int):
+ self.random = random.Random(random_seed)
+ self.ratio = ratio
+ self.selected_rows = self._select()
+
+ def skiprows(self, row_index: int):
+ if row_index == 0:
+ return False
+ if row_index % CHUNK_SIZE == 0:
+ self.selected_rows = self._select()
+ idx = row_index - int(row_index / CHUNK_SIZE) * CHUNK_SIZE
+ return self.selected_rows[idx]
+
+ def _select(self):
+ return [False if self.random.random() < self.ratio else True for x in range(1000)]
diff --git a/evidently/runner/profile_runner.py b/evidently/runner/profile_runner.py
index 3b8e354342..979141d7cd 100644
--- a/evidently/runner/profile_runner.py
+++ b/evidently/runner/profile_runner.py
@@ -10,6 +10,7 @@
from evidently.profile_sections.prob_classification_performance_profile_section import ProbClassificationPerformanceProfileSection
from evidently.profile_sections.regression_performance_profile_section import RegressionPerformanceProfileSection
from evidently.runner.runner import RunnerOptions, Runner
+from evidently.utils import NumpyEncoder
@dataclass
@@ -27,13 +28,14 @@ class ProfileRunnerOptions(RunnerOptions):
)
+
class ProfileRunner(Runner):
def __init__(self, options: ProfileRunnerOptions):
super().__init__(options)
self.options = options
def run(self):
- (reference_data, production_data) = self._parse_data()
+ (reference_data, current_data) = self._parse_data()
parts = []
@@ -44,10 +46,10 @@ def run(self):
parts.append(part_class)
profile = Profile(sections=parts)
- profile.calculate(reference_data, production_data, self.options.column_mapping)
+ profile.calculate(reference_data, current_data, self.options.column_mapping)
output_path = self.options.output_path \
if self.options.output_path.endswith(".json") \
else self.options.output_path + ".json"
with open(output_path, 'w') as f:
- json.dump(profile.object(), f, indent=2 if self.options.pretty_print else None)
+ json.dump(profile.object(), f, indent=2 if self.options.pretty_print else None, cls=NumpyEncoder)
diff --git a/evidently/runner/runner.py b/evidently/runner/runner.py
index 297108b72d..02f20d9364 100644
--- a/evidently/runner/runner.py
+++ b/evidently/runner/runner.py
@@ -1,31 +1,19 @@
+import logging
from typing import Optional, List, Dict
from dataclasses import dataclass
-import pandas as pd
-
-
-class DataOptions:
- date_column: str
- separator: str
- # is csv file contains header row
- header: bool
- # should be list of names, or None if columns should be inferred from data
- column_names: Optional[List[str]]
-
- def __init__(self, date_column: str = "datetime", separator=",", header=True, column_names=None):
- self.date_column = date_column
- self.header = header
- self.separator = separator
- self.column_names = column_names
+from evidently.runner.loader import DataLoader, SamplingOptions, DataOptions
@dataclass
class RunnerOptions:
reference_data_path: str
reference_data_options: DataOptions
- production_data_path: Optional[str]
- production_data_options: Optional[DataOptions]
+ reference_data_sampling: Optional[SamplingOptions]
+ current_data_path: Optional[str]
+ current_data_options: Optional[DataOptions]
+ current_data_sampling: Optional[SamplingOptions]
column_mapping: Dict[str, str]
output_path: str
@@ -35,23 +23,18 @@ def __init__(self, options: RunnerOptions):
self.options = options
def _parse_data(self):
- ref_parse_dates = [self.options.reference_data_options.date_column] \
- if self.options.reference_data_options.date_column \
- else False
- reference_data = pd.read_csv(self.options.reference_data_path,
- header=0 if self.options.reference_data_options.header else None,
- sep=self.options.reference_data_options.separator,
- parse_dates=ref_parse_dates)
-
- if self.options.production_data_path:
- prod_parse_dates = [self.options.production_data_options.date_column] \
- if self.options.production_data_options.date_column \
- else False
- production_data = pd.read_csv(self.options.production_data_path,
- header=0 if self.options.production_data_options.header else None,
- sep=self.options.production_data_options.separator,
- parse_dates=prod_parse_dates)
+ loader = DataLoader()
+
+ reference_data = loader.load(self.options.reference_data_path,
+ self.options.reference_data_options,
+ self.options.reference_data_sampling)
+ logging.info(f"reference dataset loaded: {len(reference_data)} rows")
+ if self.options.current_data_path:
+ current_data = loader.load(self.options.current_data_path,
+ self.options.current_data_options,
+ self.options.current_data_sampling)
+ logging.info(f"current dataset loaded: {len(current_data)} rows")
else:
- production_data = None
+ current_data = None
- return reference_data, production_data
+ return reference_data, current_data
diff --git a/evidently/tabs/base_tab.py b/evidently/tabs/base_tab.py
index 168d457ae0..f8dc1db7a9 100644
--- a/evidently/tabs/base_tab.py
+++ b/evidently/tabs/base_tab.py
@@ -22,12 +22,12 @@ def analyzers(self) -> List[Type[Analyzer]]:
return list(set([analyzer for widget in self.widgets for analyzer in widget.analyzers()]))
def calculate(self, reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame,
+ current_data: pandas.DataFrame,
column_mapping: Dict,
analyzers_results: Dict):
self.widgets = self._get_widgets()
for widget in self.widgets:
- widget.calculate(reference_data, production_data, column_mapping, analyzers_results)
+ widget.calculate(reference_data, current_data, column_mapping, analyzers_results)
def info(self) -> List[BaseWidgetInfo]:
return [w.get_info() for w in self.widgets]
diff --git a/evidently/utils/__init__.py b/evidently/utils/__init__.py
new file mode 100644
index 0000000000..64d6a69536
--- /dev/null
+++ b/evidently/utils/__init__.py
@@ -0,0 +1,22 @@
+import json
+import numpy as np
+
+
+_integer_types = (np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64)
+_float_types = (np.float_, np.float16, np.float32, np.float64)
+
+
+class NumpyEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, _integer_types):
+ return int(obj)
+ elif isinstance(obj, (np.float_, np.float16, np.float32, np.float64)):
+ return float(obj)
+ elif isinstance(obj, (np.ndarray,)):
+ return obj.tolist()
+ elif isinstance(obj, np.bool_):
+ return bool(obj)
+ elif isinstance(obj, np.void):
+ return None
+
+ return json.JSONEncoder.default(self, obj)
diff --git a/evidently/widgets/cat_prediction_drift_widget.py b/evidently/widgets/cat_prediction_drift_widget.py
index b76c52b2d1..7d664de5e2 100644
--- a/evidently/widgets/cat_prediction_drift_widget.py
+++ b/evidently/widgets/cat_prediction_drift_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -66,31 +66,31 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#ref_feature_vc = reference_data[prediction_column][np.isfinite(reference_data[prediction_column])].value_counts()
- #prod_feature_vc = production_data[prediction_column][np.isfinite(production_data[prediction_column])].value_counts()
+ #current_feature_vc = current_data[prediction_column][np.isfinite(current_data[prediction_column])].value_counts()
#keys = set(list(reference_data[prediction_column][np.isfinite(reference_data[prediction_column])].unique()) +
- # list(production_data[prediction_column][np.isfinite(production_data[prediction_column])].unique()))
+ # list(current_data[prediction_column][np.isfinite(current_data[prediction_column])].unique()))
ref_feature_vc = reference_data[prediction_column].value_counts()
- prod_feature_vc = production_data[prediction_column].value_counts()
+ current_feature_vc = current_data[prediction_column].value_counts()
keys = set(list(reference_data[prediction_column].unique()) +
- list(production_data[prediction_column].unique()))
+ list(current_data[prediction_column].unique()))
ref_feature_dict = dict.fromkeys(keys, 0)
for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
ref_feature_dict[key] = item
- prod_feature_dict = dict.fromkeys(keys, 0)
- for key, item in zip(prod_feature_vc.index, prod_feature_vc.values):
- prod_feature_dict[key] = item
+ current_feature_dict = dict.fromkeys(keys, 0)
+ for key, item in zip(current_feature_vc.index, current_feature_vc.values):
+ current_feature_dict[key] = item
f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
- f_obs = [value[1] for value in sorted(prod_feature_dict.items())]
+ f_obs = [value[1] for value in sorted(current_feature_dict.items())]
pred_p_value = chisquare(f_exp, f_obs)[1]
@@ -102,7 +102,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
fig.add_trace(go.Histogram(x=reference_data[prediction_column],
marker_color=grey, opacity=0.6, nbinsx=10, name='Reference', histnorm='probability'))
- fig.add_trace(go.Histogram(x=production_data[prediction_column],
+ fig.add_trace(go.Histogram(x=current_data[prediction_column],
marker_color=red, opacity=0.6,nbinsx=10, name='Current', histnorm='probability'))
fig.update_layout(
diff --git a/evidently/widgets/cat_target_drift_widget.py b/evidently/widgets/cat_target_drift_widget.py
index f0632454ed..48ce1dcc00 100644
--- a/evidently/widgets/cat_target_drift_widget.py
+++ b/evidently/widgets/cat_target_drift_widget.py
@@ -29,7 +29,7 @@ def analyzers(self):
def get_info(self) -> BaseWidgetInfo:
return self.wi
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -62,32 +62,32 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#calculate output drift
#ref_feature_vc = reference_data[target_column][np.isfinite(reference_data[target_column])].value_counts()
- #prod_feature_vc = production_data[target_column][np.isfinite(production_data[target_column])].value_counts()
+ #current_feature_vc = current_data[target_column][np.isfinite(current_data[target_column])].value_counts()
#keys = set(list(reference_data[target_column][np.isfinite(reference_data[target_column])].unique()) +
- # list(production_data[target_column][np.isfinite(production_data[target_column])].unique()))
+ # list(current_data[target_column][np.isfinite(current_data[target_column])].unique()))
ref_feature_vc = reference_data[target_column].value_counts()
- prod_feature_vc = production_data[target_column].value_counts()
+ current_feature_vc = current_data[target_column].value_counts()
keys = set(list(reference_data[target_column].unique()) +
- list(production_data[target_column].unique()))
+ list(current_data[target_column].unique()))
ref_feature_dict = dict.fromkeys(keys, 0)
for key, item in zip(ref_feature_vc.index, ref_feature_vc.values):
ref_feature_dict[key] = item
- prod_feature_dict = dict.fromkeys(keys, 0)
- for key, item in zip(prod_feature_vc.index, prod_feature_vc.values):
- prod_feature_dict[key] = item
+ current_feature_dict = dict.fromkeys(keys, 0)
+ for key, item in zip(current_feature_vc.index, current_feature_vc.values):
+ current_feature_dict[key] = item
f_exp = [value[1] for value in sorted(ref_feature_dict.items())]
- f_obs = [value[1] for value in sorted(prod_feature_dict.items())]
+ f_obs = [value[1] for value in sorted(current_feature_dict.items())]
target_p_value = chisquare(f_exp, f_obs)[1]
@@ -99,7 +99,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
fig.add_trace(go.Histogram(x=reference_data[target_column],
marker_color=grey, opacity=0.6, nbinsx=10, name='Reference', histnorm='probability'))
- fig.add_trace(go.Histogram(x=production_data[target_column],
+ fig.add_trace(go.Histogram(x=current_data[target_column],
marker_color=red, opacity=0.6,nbinsx=10, name='Current', histnorm='probability'))
fig.update_layout(
diff --git a/evidently/widgets/cat_target_pred_feature_table_widget.py b/evidently/widgets/cat_target_pred_feature_table_widget.py
index 0ea0aba804..4f9ce619e5 100644
--- a/evidently/widgets/cat_target_pred_feature_table_widget.py
+++ b/evidently/widgets/cat_target_pred_feature_table_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("neither target nor prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -86,8 +86,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#create target plot
reference_data['dataset'] = 'Reference'
- production_data['dataset'] = 'Current'
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ merged_data = pd.concat([reference_data, current_data])
target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
category_orders={"dataset": ["Reference", "Current"]})
@@ -166,8 +166,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#create target plot
#TO DO%: out pf the cycle
reference_data['dataset'] = 'Reference'
- production_data['dataset'] = 'Current'
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ merged_data = pd.concat([reference_data, current_data])
target_fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset",
category_orders={"dataset": ["Reference", "Current"]})
@@ -228,8 +228,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#create target plot
reference_data['dataset'] = 'Reference'
- production_data['dataset'] = 'Current'
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ merged_data = pd.concat([reference_data, current_data])
prediction_fig = px.histogram(merged_data, x=feature_name, color=prediction_column, facet_col="dataset",
category_orders={"dataset": ["Reference", "Current"]})
diff --git a/evidently/widgets/class_confusion_based_feature_distr_table_widget.py b/evidently/widgets/class_confusion_based_feature_distr_table_widget.py
index 066d3532ce..c66646e207 100644
--- a/evidently/widgets/class_confusion_based_feature_distr_table_widget.py
+++ b/evidently/widgets/class_confusion_based_feature_distr_table_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("neither target nor prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,7 +64,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
if prediction_column is not None and target_column is not None:
- if production_data is not None:
+ if current_data is not None:
additional_graphs_data = []
params_data = []
@@ -85,8 +85,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#create confusion based plots
reference_data['dataset'] = 'Reference'
- production_data['dataset'] = 'Current'
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ merged_data = pd.concat([reference_data, current_data])
fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", histnorm = '',
category_orders={"dataset": ["Reference", "Current"]})
diff --git a/evidently/widgets/class_prod_class_support_widget.py b/evidently/widgets/class_prod_class_support_widget.py
index 72bbeaf50d..af889527bd 100644
--- a/evidently/widgets/class_prod_class_support_widget.py
+++ b/evidently/widgets/class_prod_class_support_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,12 +64,12 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot support bar
- metrics_matrix = metrics.classification_report(production_data[target_column], production_data[prediction_column],
+ metrics_matrix = metrics.classification_report(current_data[target_column], current_data[prediction_column],
output_dict=True)
metrics_frame = pd.DataFrame(metrics_matrix)
support = metrics_frame.iloc[-1:,:-3].values[0]
diff --git a/evidently/widgets/class_prod_conf_matrix_widget.py b/evidently/widgets/class_prod_conf_matrix_widget.py
index 85ec29914d..e016b1cb88 100644
--- a/evidently/widgets/class_prod_conf_matrix_widget.py
+++ b/evidently/widgets/class_prod_conf_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,17 +64,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot confusion matrix
- conf_matrix = metrics.confusion_matrix(production_data[target_column],
- production_data[prediction_column])
+ conf_matrix = metrics.confusion_matrix(current_data[target_column],
+ current_data[prediction_column])
z = conf_matrix.astype(int)
- labels = target_names if target_names else sorted(set(production_data[target_column]))
+ labels = target_names if target_names else sorted(set(current_data[target_column]))
# change each element of z to type string for annotations
z_text = [[str(y) for y in x] for x in z]
diff --git a/evidently/widgets/class_prod_metrics_matrix_widget.py b/evidently/widgets/class_prod_metrics_matrix_widget.py
index aa5dbec0ba..e809c0cc7f 100644
--- a/evidently/widgets/class_prod_metrics_matrix_widget.py
+++ b/evidently/widgets/class_prod_metrics_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,12 +64,12 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot support bar
- metrics_matrix = metrics.classification_report(production_data[target_column], production_data[prediction_column],
+ metrics_matrix = metrics.classification_report(current_data[target_column], current_data[prediction_column],
output_dict=True)
metrics_frame = pd.DataFrame(metrics_matrix)
diff --git a/evidently/widgets/class_prod_quality_metrics_widget.py b/evidently/widgets/class_prod_quality_metrics_widget.py
index 42e97890ed..ecc8f03c7e 100644
--- a/evidently/widgets/class_prod_quality_metrics_widget.py
+++ b/evidently/widgets/class_prod_quality_metrics_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -59,18 +59,18 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#calculate quality metrics
- accuracy_score = metrics.accuracy_score(production_data[target_column], production_data[prediction_column])
- avg_precision = metrics.precision_score(production_data[target_column], production_data[prediction_column],
+ accuracy_score = metrics.accuracy_score(current_data[target_column], current_data[prediction_column])
+ avg_precision = metrics.precision_score(current_data[target_column], current_data[prediction_column],
average='macro')
- avg_recall = metrics.recall_score(production_data[target_column], production_data[prediction_column],
+ avg_recall = metrics.recall_score(current_data[target_column], current_data[prediction_column],
average='macro')
- avg_f1 = metrics.f1_score(production_data[target_column], production_data[prediction_column],
+ avg_f1 = metrics.f1_score(current_data[target_column], current_data[prediction_column],
average='macro')
self.wi = BaseWidgetInfo(
diff --git a/evidently/widgets/class_ref_class_support_widget.py b/evidently/widgets/class_ref_class_support_widget.py
index 19504cc85e..2150368406 100644
--- a/evidently/widgets/class_ref_class_support_widget.py
+++ b/evidently/widgets/class_ref_class_support_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -94,7 +94,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": support_bar_json['data'],
"layout": support_bar_json['layout']
diff --git a/evidently/widgets/class_ref_conf_matrix_widget.py b/evidently/widgets/class_ref_conf_matrix_widget.py
index 999a452aac..0d02edfc91 100644
--- a/evidently/widgets/class_ref_conf_matrix_widget.py
+++ b/evidently/widgets/class_ref_conf_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -96,7 +96,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": conf_matrix_json['data'],
"layout": conf_matrix_json['layout']
diff --git a/evidently/widgets/class_ref_metrics_matrix_widget.py b/evidently/widgets/class_ref_metrics_matrix_widget.py
index b711f23dab..cf59d8f88c 100644
--- a/evidently/widgets/class_ref_metrics_matrix_widget.py
+++ b/evidently/widgets/class_ref_metrics_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -98,7 +98,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": metrics_matrix_json['data'],
"layout": metrics_matrix_json['layout']
diff --git a/evidently/widgets/class_ref_quality_metrics_widget.py b/evidently/widgets/class_ref_quality_metrics_widget.py
index 41b9de1f95..5bd0ccf7ae 100644
--- a/evidently/widgets/class_ref_quality_metrics_widget.py
+++ b/evidently/widgets/class_ref_quality_metrics_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/class_target_name_widget.py b/evidently/widgets/class_target_name_widget.py
index 90e3f70a2f..2f412ed0b6 100644
--- a/evidently/widgets/class_target_name_widget.py
+++ b/evidently/widgets/class_target_name_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/counter_widget.py b/evidently/widgets/counter_widget.py
index efecf10aa2..be5295c1ea 100644
--- a/evidently/widgets/counter_widget.py
+++ b/evidently/widgets/counter_widget.py
@@ -22,7 +22,7 @@ def analyzers(self):
def calculate(self,
reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame,
+ current_data: pandas.DataFrame,
column_mapping: Dict,
analyzes_results):
self.wi = BaseWidgetInfo(
diff --git a/evidently/widgets/data_drift_table_widget.py b/evidently/widgets/data_drift_table_widget.py
index 98147f80c1..98bb6922ef 100644
--- a/evidently/widgets/data_drift_table_widget.py
+++ b/evidently/widgets/data_drift_table_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
def calculate(self,
reference_data: pd.DataFrame,
- production_data: pd.DataFrame,
+ current_data: pd.DataFrame,
column_mapping,
analyzers_results):
results = analyzers_results[DataDriftAnalyzer]
@@ -44,7 +44,7 @@ def calculate(self,
date_column = results['utility_columns']['date']
for feature_name in num_feature_names:
- prod_small_hist = results['metrics'][feature_name]["prod_small_hist"]
+ current_small_hist = results['metrics'][feature_name]["current_small_hist"]
ref_small_hist = results['metrics'][feature_name]["ref_small_hist"]
feature_type = results['metrics'][feature_name]["feature_type"]
@@ -76,8 +76,8 @@ def calculate(self,
"y": list(ref_small_hist[0])
},
"f4": {
- "x": list(prod_small_hist[1]),
- "y": list(prod_small_hist[0])
+ "x": list(current_small_hist[1]),
+ "y": list(current_small_hist[0])
},
"f2": distr_sim_test,
"f5": round(p_value, 6)
@@ -85,7 +85,7 @@ def calculate(self,
)
for feature_name in cat_feature_names:
- prod_small_hist = results['metrics'][feature_name]["prod_small_hist"]
+ current_small_hist = results['metrics'][feature_name]["current_small_hist"]
ref_small_hist = results['metrics'][feature_name]["ref_small_hist"]
feature_type = results['metrics'][feature_name]["feature_type"]
@@ -118,8 +118,8 @@ def calculate(self,
"y": list(ref_small_hist[0])
},
"f4": {
- "x": list(prod_small_hist[1]),
- "y": list(prod_small_hist[0])
+ "x": list(current_small_hist[1]),
+ "y": list(current_small_hist[0])
},
"f2": distr_sim_test,
"f5": round(p_value, 6)
@@ -135,7 +135,7 @@ def calculate(self,
marker_color=grey, opacity=0.6, nbinsx=10, name='Reference',
histnorm='probability'))
- fig.add_trace(go.Histogram(x=production_data[feature_name],
+ fig.add_trace(go.Histogram(x=current_data[feature_name],
marker_color=red, opacity=0.6, nbinsx=10, name='Current',
histnorm='probability'))
@@ -161,8 +161,8 @@ def calculate(self,
fig = go.Figure()
fig.add_trace(go.Scatter(
- x=production_data[date_column] if date_column else production_data.index,
- y=production_data[feature_name],
+ x=current_data[date_column] if date_column else current_data.index,
+ y=current_data[feature_name],
mode='markers',
name='Current',
marker=dict(
diff --git a/evidently/widgets/num_prediction_corr_widget.py b/evidently/widgets/num_prediction_corr_widget.py
index 2726f90155..df3e52ac61 100644
--- a/evidently/widgets/num_prediction_corr_widget.py
+++ b/evidently/widgets/num_prediction_corr_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,7 +64,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#calculate corr
ref_pred_corr = reference_data[num_feature_names + [prediction_column]].corr()[prediction_column]
- prod_pred_corr = production_data[num_feature_names + [prediction_column]].corr()[prediction_column]
+ current_pred_corr = current_data[num_feature_names + [prediction_column]].corr()[prediction_column]
#plot output correlations
pred_corr = go.Figure()
@@ -72,7 +72,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
pred_corr.add_trace(go.Bar(y = ref_pred_corr, x = ref_pred_corr.index,
marker_color = grey, name = 'Reference'))
- pred_corr.add_trace(go.Bar(y = prod_pred_corr, x = ref_pred_corr.index,
+ pred_corr.add_trace(go.Bar(y = current_pred_corr, x = ref_pred_corr.index,
marker_color = red, name = 'Current'))
pred_corr.update_layout(xaxis_title = "Features", yaxis_title = "Correlation",
diff --git a/evidently/widgets/num_prediction_drift_widget.py b/evidently/widgets/num_prediction_drift_widget.py
index 08440b1a55..1347e081cc 100644
--- a/evidently/widgets/num_prediction_drift_widget.py
+++ b/evidently/widgets/num_prediction_drift_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -63,12 +63,12 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
if prediction_column is not None:
#calculate output drift
- pred_p_value = ks_2samp(reference_data[prediction_column], production_data[prediction_column])[1]
+ pred_p_value = ks_2samp(reference_data[prediction_column], current_data[prediction_column])[1]
pred_sim_test = "detected" if pred_p_value < 0.05 else "not detected"
#plot output distributions
pred_distr = ff.create_distplot(
- [reference_data[prediction_column], production_data[prediction_column]],
+ [reference_data[prediction_column], current_data[prediction_column]],
["Reference", "Current"],
colors=[grey, red],
show_rug=True)
diff --git a/evidently/widgets/num_prediction_values_widget.py b/evidently/widgets/num_prediction_values_widget.py
index 5e061c2752..27663c1c94 100644
--- a/evidently/widgets/num_prediction_values_widget.py
+++ b/evidently/widgets/num_prediction_values_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -81,8 +81,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
))
pred_values.add_trace(go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = production_data[prediction_column],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = current_data[prediction_column],
mode = 'markers',
name = 'Current',
marker = dict(
diff --git a/evidently/widgets/num_target_corr_widget.py b/evidently/widgets/num_target_corr_widget.py
index c583cb64ee..5deb3d5246 100644
--- a/evidently/widgets/num_target_corr_widget.py
+++ b/evidently/widgets/num_target_corr_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,7 +64,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#calculate corr
ref_target_corr = reference_data[num_feature_names + [target_column]].corr()[target_column]
- prod_target_corr = production_data[num_feature_names + [target_column]].corr()[target_column]
+ current_target_corr = current_data[num_feature_names + [target_column]].corr()[target_column]
#plot output correlations
target_corr = go.Figure()
@@ -72,7 +72,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_corr.add_trace(go.Bar(y = ref_target_corr, x = ref_target_corr.index,
marker_color = grey, name = 'Reference'))
- target_corr.add_trace(go.Bar(y = prod_target_corr, x = ref_target_corr.index,
+ target_corr.add_trace(go.Bar(y = current_target_corr, x = ref_target_corr.index,
marker_color = red, name = 'Current'))
target_corr.update_layout(xaxis_title = "Features", yaxis_title = "Correlation",
diff --git a/evidently/widgets/num_target_drift_widget.py b/evidently/widgets/num_target_drift_widget.py
index 1bc59b3db7..5717b011b4 100644
--- a/evidently/widgets/num_target_drift_widget.py
+++ b/evidently/widgets/num_target_drift_widget.py
@@ -29,7 +29,7 @@ def analyzers(self):
def get_info(self) -> BaseWidgetInfo:
return self.wi
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,12 +60,12 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
if target_column is not None:
#calculate output drift
- target_p_value = ks_2samp(reference_data[target_column], production_data[target_column])[1]
+ target_p_value = ks_2samp(reference_data[target_column], current_data[target_column])[1]
target_sim_test = "detected" if target_p_value < 0.05 else "not detected"
#plot output distributions
target_distr = ff.create_distplot(
- [reference_data[target_column], production_data[target_column]],
+ [reference_data[target_column], current_data[target_column]],
["Reference", "Current"],
colors=[grey, red],
show_rug=True)
diff --git a/evidently/widgets/num_target_pred_feature_table_widget.py b/evidently/widgets/num_target_pred_feature_table_widget.py
index b4a58a28f9..b071f1276c 100644
--- a/evidently/widgets/num_target_pred_feature_table_widget.py
+++ b/evidently/widgets/num_target_pred_feature_table_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("neither target nor prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -116,8 +116,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
if prediction_column is not None:
fig.add_trace(
go.Scatter(
- x = production_data[feature_name],
- y = production_data[prediction_column],
+ x = current_data[feature_name],
+ y = current_data[prediction_column],
mode = 'markers',
name = 'Prediction (curr)',
marker = dict(
@@ -131,8 +131,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
if target_column is not None:
fig.add_trace(
go.Scatter(
- x = production_data[feature_name],
- y = production_data[target_column],
+ x = current_data[feature_name],
+ y = current_data[target_column],
mode = 'markers',
name = 'Target (curr)',
marker = dict(
diff --git a/evidently/widgets/num_target_values_widget.py b/evidently/widgets/num_target_values_widget.py
index 5d990f4195..89cc763d3e 100644
--- a/evidently/widgets/num_target_values_widget.py
+++ b/evidently/widgets/num_target_values_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -81,8 +81,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
))
target_values.add_trace(go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = production_data[target_column],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = current_data[target_column],
mode = 'markers',
name = 'Current',
marker = dict(
diff --git a/evidently/widgets/prob_class_confusion_based_feature_distr_table_widget.py b/evidently/widgets/prob_class_confusion_based_feature_distr_table_widget.py
index c617f2fc52..87070d698e 100644
--- a/evidently/widgets/prob_class_confusion_based_feature_distr_table_widget.py
+++ b/evidently/widgets/prob_class_confusion_based_feature_distr_table_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("neither target nor prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -69,16 +69,16 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
binaraized_target = binaraizer.transform(reference_data[target_column])
- if production_data is not None:
+ if current_data is not None:
ref_array_prediction = reference_data[prediction_column].to_numpy()
ref_prediction_ids = np.argmax(ref_array_prediction, axis=-1)
ref_prediction_labels = [prediction_column[x] for x in ref_prediction_ids]
reference_data['prediction_labels'] = ref_prediction_labels
- prod_array_prediction = production_data[prediction_column].to_numpy()
- prod_prediction_ids = np.argmax(prod_array_prediction, axis=-1)
- prod_prediction_labels = [prediction_column[x] for x in prod_prediction_ids]
- production_data['prediction_labels'] = prod_prediction_labels
+ current_array_prediction = current_data[prediction_column].to_numpy()
+ current_prediction_ids = np.argmax(current_array_prediction, axis=-1)
+ current_prediction_labels = [prediction_column[x] for x in current_prediction_ids]
+ current_data['prediction_labels'] = current_prediction_labels
additional_graphs_data = []
params_data = []
@@ -99,8 +99,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#create confusion based plots
reference_data['dataset'] = 'Reference'
- production_data['dataset'] = 'Current'
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ merged_data = pd.concat([reference_data, current_data])
fig = px.histogram(merged_data, x=feature_name, color=target_column, facet_col="dataset", histnorm = '',
category_orders={"dataset": ["Reference", "Current"]})
@@ -156,15 +156,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
showticklabels=True
),
yaxis = dict(
- range=(0, 1),
+ range=(-0.1, 1.1),
showticklabels=True
)
)
- #PROD Prediction
+ #current Prediction
fig.add_trace(go.Scatter(
- x = production_data[production_data[target_column] == label][feature_name],
- y = production_data[production_data[target_column] == label][label],
+ x = current_data[current_data[target_column] == label][feature_name],
+ y = current_data[current_data[target_column] == label][label],
mode = 'markers',
name = str(label) + ' (curr)',
marker=dict(
@@ -176,8 +176,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
)
fig.add_trace(go.Scatter(
- x = production_data[production_data[target_column] != label][feature_name],
- y = production_data[production_data[target_column] != label][label],
+ x = current_data[current_data[target_column] != label][feature_name],
+ y = current_data[current_data[target_column] != label][label],
mode = 'markers',
name = 'other (curr)',
marker=dict(
@@ -195,7 +195,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
showticklabels=True
),
yaxis = dict(
- range=(0, 1),
+ range=(-0.1, 1.1),
showticklabels=True
)
)
@@ -316,7 +316,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
showticklabels=True
),
yaxis = dict(
- range=(0, 1),
+ range=(-0.1, 1.1),
showticklabels=True
)
)
diff --git a/evidently/widgets/prob_class_prod_class_support_widget.py b/evidently/widgets/prob_class_prod_class_support_widget.py
index ee694555e4..524c7e3533 100644
--- a/evidently/widgets/prob_class_prod_class_support_widget.py
+++ b/evidently/widgets/prob_class_prod_class_support_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,17 +64,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
#plot support bar
- metrics_matrix = metrics.classification_report(production_data[target_column], prediction_labels,
+ metrics_matrix = metrics.classification_report(current_data[target_column], prediction_labels,
output_dict=True)
metrics_frame = pd.DataFrame(metrics_matrix)
support = metrics_frame.iloc[-1:,:-3].values[0]
diff --git a/evidently/widgets/prob_class_prod_conf_matrix_widget.py b/evidently/widgets/prob_class_prod_conf_matrix_widget.py
index 18ab9e5a8a..0f84a346db 100644
--- a/evidently/widgets/prob_class_prod_conf_matrix_widget.py
+++ b/evidently/widgets/prob_class_prod_conf_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,22 +64,22 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
#plot confusion matrix
- conf_matrix = metrics.confusion_matrix(production_data[target_column],
+ conf_matrix = metrics.confusion_matrix(current_data[target_column],
prediction_labels)
z = conf_matrix.astype(int)
- labels = sorted(set(production_data[target_column]))
+ labels = sorted(set(current_data[target_column]))
# change each element of z to type string for annotations
z_text = [[str(y) for y in x] for x in z]
diff --git a/evidently/widgets/prob_class_prod_metrics_matrix_widget.py b/evidently/widgets/prob_class_prod_metrics_matrix_widget.py
index 36431b2765..c05010e7b0 100644
--- a/evidently/widgets/prob_class_prod_metrics_matrix_widget.py
+++ b/evidently/widgets/prob_class_prod_metrics_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,23 +64,23 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
- binaraized_target = binaraizer.transform(production_data[target_column])
+ binaraized_target = binaraizer.transform(current_data[target_column])
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
- labels = sorted(set(production_data[target_column]))
+ labels = sorted(set(current_data[target_column]))
#plot support bar
- metrics_matrix = metrics.classification_report(production_data[target_column], prediction_labels,
+ metrics_matrix = metrics.classification_report(current_data[target_column], prediction_labels,
output_dict=True)
metrics_frame = pd.DataFrame(metrics_matrix)
diff --git a/evidently/widgets/prob_class_prod_pr_curve_widget.py b/evidently/widgets/prob_class_prod_pr_curve_widget.py
index 688fa315b8..a678ab2d43 100644
--- a/evidently/widgets/prob_class_prod_pr_curve_widget.py
+++ b/evidently/widgets/prob_class_prod_pr_curve_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,9 +64,9 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#array_prediction = reference_data[prediction_column].to_numpy()
@@ -74,11 +74,11 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#prediction_labels = [prediction_column[x] for x in prediction_ids]
if len(prediction_column) <= 2:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = ['target']
- p, r, thrs = metrics.precision_recall_curve(binaraized_target, production_data[prediction_column[0]]) #problem!!!
+ p, r, thrs = metrics.precision_recall_curve(binaraized_target, current_data[prediction_column[0]]) #problem!!!
fig = go.Figure()
fig.add_trace(go.Scatter(
@@ -118,14 +118,14 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
else:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = prediction_column
#plot support bar
graphs = []
for label in prediction_column:
- p, r, thrs = metrics.precision_recall_curve(binaraized_target[label], production_data[label])
+ p, r, thrs = metrics.precision_recall_curve(binaraized_target[label], current_data[label])
fig = go.Figure()
fig.add_trace(go.Scatter(
diff --git a/evidently/widgets/prob_class_prod_pr_table_widget.py b/evidently/widgets/prob_class_prod_pr_table_widget.py
index 612ee4777c..3400f738ba 100644
--- a/evidently/widgets/prob_class_prod_pr_table_widget.py
+++ b/evidently/widgets/prob_class_prod_pr_table_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,9 +64,9 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#array_prediction = reference_data[prediction_column].to_numpy()
@@ -75,14 +75,14 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
if len(prediction_column) <= 2:
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = ['target']
params_data = []
step_size = 0.05
binded = list(zip(binaraized_target['target'].tolist(),
- production_data[prediction_column[0]].tolist()))
+ current_data[prediction_column[0]].tolist()))
binded.sort(key = lambda item: item[1], reverse = True)
data_size = len(binded)
@@ -157,7 +157,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
else:
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = prediction_column
#create tables
@@ -168,7 +168,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
step_size = 0.05
binded = list(zip(binaraized_target[label].tolist(),
- production_data[label].tolist()))
+ current_data[label].tolist()))
binded.sort(key = lambda item: item[1], reverse = True)
data_size = len(binded)
diff --git a/evidently/widgets/prob_class_prod_pred_distr_widget.py b/evidently/widgets/prob_class_prod_pred_distr_widget.py
index 48e34c873c..0884c93d4d 100644
--- a/evidently/widgets/prob_class_prod_pred_distr_widget.py
+++ b/evidently/widgets/prob_class_prod_pred_distr_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzers_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzers_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -65,11 +65,11 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
@@ -80,8 +80,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
for label in prediction_column:
pred_distr = ff.create_distplot(
[
- production_data[production_data[target_column] == label][label],
- production_data[production_data[target_column] != label][label]
+ current_data[current_data[target_column] == label][label],
+ current_data[current_data[target_column] != label][label]
],
[str(label), "other"],
colors=[red, grey],
diff --git a/evidently/widgets/prob_class_prod_prediction_cloud_widget.py b/evidently/widgets/prob_class_prod_prediction_cloud_widget.py
index 0b905b5ce4..9763921b24 100644
--- a/evidently/widgets/prob_class_prod_prediction_cloud_widget.py
+++ b/evidently/widgets/prob_class_prod_prediction_cloud_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,11 +64,11 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
@@ -80,8 +80,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
fig = go.Figure()
fig.add_trace(go.Scatter(
- x = np.random.random(production_data[production_data[target_column] == label].shape[0]),
- y = production_data[production_data[target_column] == label][label],
+ x = np.random.random(current_data[current_data[target_column] == label].shape[0]),
+ y = current_data[current_data[target_column] == label][label],
mode = 'markers',
name = str(label),
marker=dict(
@@ -91,8 +91,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
))
fig.add_trace(go.Scatter(
- x = np.random.random(production_data[production_data[target_column] != label].shape[0]),
- y = production_data[production_data[target_column] != label][label],
+ x = np.random.random(current_data[current_data[target_column] != label].shape[0]),
+ y = current_data[current_data[target_column] != label][label],
mode = 'markers',
name = 'other',
marker=dict(
diff --git a/evidently/widgets/prob_class_prod_quality_metrics_widget.py b/evidently/widgets/prob_class_prod_quality_metrics_widget.py
index 9d93b92d30..7df0098f1f 100644
--- a/evidently/widgets/prob_class_prod_quality_metrics_widget.py
+++ b/evidently/widgets/prob_class_prod_quality_metrics_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -59,16 +59,16 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
binaraizer = preprocessing.LabelBinarizer()
binaraizer.fit(reference_data[target_column])
- binaraized_target = binaraizer.transform(production_data[target_column])
+ binaraized_target = binaraizer.transform(current_data[target_column])
- array_prediction = production_data[prediction_column].to_numpy()
+ array_prediction = current_data[prediction_column].to_numpy()
prediction_ids = np.argmax(array_prediction, axis=-1)
prediction_labels = [prediction_column[x] for x in prediction_ids]
@@ -78,15 +78,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
roc_auc = metrics.roc_auc_score(binaraized_target, array_prediction, average='macro')
log_loss = metrics.log_loss(binaraized_target, array_prediction)
else:
- roc_auc = metrics.roc_auc_score(binaraized_target, production_data[prediction_column[0]]) #problem!!!
- log_loss = metrics.log_loss(binaraized_target, production_data[prediction_column[0]]) #problem!!!
+ roc_auc = metrics.roc_auc_score(binaraized_target, current_data[prediction_column[0]]) #problem!!!
+ log_loss = metrics.log_loss(binaraized_target, current_data[prediction_column[0]]) #problem!!!
- accuracy_score = metrics.accuracy_score(production_data[target_column], prediction_labels)
- avg_precision = metrics.precision_score(production_data[target_column], prediction_labels,
+ accuracy_score = metrics.accuracy_score(current_data[target_column], prediction_labels)
+ avg_precision = metrics.precision_score(current_data[target_column], prediction_labels,
average='macro')
- avg_recall = metrics.recall_score(production_data[target_column], prediction_labels,
+ avg_recall = metrics.recall_score(current_data[target_column], prediction_labels,
average='macro')
- avg_f1 = metrics.f1_score(production_data[target_column], prediction_labels,
+ avg_f1 = metrics.f1_score(current_data[target_column], prediction_labels,
average='macro')
self.wi = BaseWidgetInfo(
diff --git a/evidently/widgets/prob_class_prod_roc_curve_widget.py b/evidently/widgets/prob_class_prod_roc_curve_widget.py
index 2907e8e4fa..6bba479592 100644
--- a/evidently/widgets/prob_class_prod_roc_curve_widget.py
+++ b/evidently/widgets/prob_class_prod_roc_curve_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -64,17 +64,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#target_names = None
- if production_data is not None and target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None and target_column is not None and prediction_column is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
if len(prediction_column) <= 2:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = ['target']
- fpr, tpr, thrs = metrics.roc_curve(binaraized_target, production_data[prediction_column[0]]) #problem!!!
+ fpr, tpr, thrs = metrics.roc_curve(binaraized_target, current_data[prediction_column[0]]) #problem!!!
fig = go.Figure()
fig.add_trace(go.Scatter(
@@ -114,14 +114,14 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
else:
binaraizer = preprocessing.LabelBinarizer()
- binaraizer.fit(production_data[target_column])
- binaraized_target = pd.DataFrame(binaraizer.transform(production_data[target_column]))
+ binaraizer.fit(current_data[target_column])
+ binaraized_target = pd.DataFrame(binaraizer.transform(current_data[target_column]))
binaraized_target.columns = prediction_column
#plot support bar
graphs = []
for label in prediction_column:
- fpr, tpr, thrs = metrics.roc_curve(binaraized_target[label], production_data[label])
+ fpr, tpr, thrs = metrics.roc_curve(binaraized_target[label], current_data[label])
fig = go.Figure()
fig.add_trace(go.Scatter(
diff --git a/evidently/widgets/prob_class_ref_class_support_widget.py b/evidently/widgets/prob_class_ref_class_support_widget.py
index d90a9b3a32..c4f41a1441 100644
--- a/evidently/widgets/prob_class_ref_class_support_widget.py
+++ b/evidently/widgets/prob_class_ref_class_support_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -103,7 +103,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": support_bar_json['data'],
"layout": support_bar_json['layout']
diff --git a/evidently/widgets/prob_class_ref_conf_matrix_widget.py b/evidently/widgets/prob_class_ref_conf_matrix_widget.py
index 228645a66e..a44e745a7f 100644
--- a/evidently/widgets/prob_class_ref_conf_matrix_widget.py
+++ b/evidently/widgets/prob_class_ref_conf_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -101,7 +101,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": conf_matrix_json['data'],
"layout": conf_matrix_json['layout']
diff --git a/evidently/widgets/prob_class_ref_metrics_matrix_widget.py b/evidently/widgets/prob_class_ref_metrics_matrix_widget.py
index 6f424f5817..802b0fcd19 100644
--- a/evidently/widgets/prob_class_ref_metrics_matrix_widget.py
+++ b/evidently/widgets/prob_class_ref_metrics_matrix_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -112,7 +112,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": metrics_matrix_json['data'],
"layout": metrics_matrix_json['layout']
diff --git a/evidently/widgets/prob_class_ref_pr_curve_widget.py b/evidently/widgets/prob_class_ref_pr_curve_widget.py
index fcc7f002b0..bcfa2cee7a 100644
--- a/evidently/widgets/prob_class_ref_pr_curve_widget.py
+++ b/evidently/widgets/prob_class_ref_pr_curve_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -108,7 +108,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": fig_json['data'],
"layout": fig_json['layout']
@@ -163,7 +163,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"graphs": graphs
},
diff --git a/evidently/widgets/prob_class_ref_pr_table_widget.py b/evidently/widgets/prob_class_ref_pr_table_widget.py
index cc81b74bb3..53c70f800b 100644
--- a/evidently/widgets/prob_class_ref_pr_table_widget.py
+++ b/evidently/widgets/prob_class_ref_pr_table_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -116,7 +116,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"rowsPerPage" : 21,
"columns": [
@@ -206,7 +206,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=2, #if production_data is not None else 2,
+ size=2, #if current_data is not None else 2,
params={
"rowsPerPage": 21,
"columns": [
@@ -249,7 +249,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
self.wi = BaseWidgetInfo(
type="tabs",
title=self.title,
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
details="",
tabs=tabs
)
diff --git a/evidently/widgets/prob_class_ref_pred_distr_widget.py b/evidently/widgets/prob_class_ref_pred_distr_widget.py
index a768c5f4f3..6b4a3fe455 100644
--- a/evidently/widgets/prob_class_ref_pred_distr_widget.py
+++ b/evidently/widgets/prob_class_ref_pred_distr_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzers_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzers_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -121,7 +121,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"graphs": graphs
},
diff --git a/evidently/widgets/prob_class_ref_prediction_cloud_widget.py b/evidently/widgets/prob_class_ref_prediction_cloud_widget.py
index 11ed5414ba..5f8bfe609d 100644
--- a/evidently/widgets/prob_class_ref_prediction_cloud_widget.py
+++ b/evidently/widgets/prob_class_ref_prediction_cloud_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -128,7 +128,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"graphs": graphs
},
diff --git a/evidently/widgets/prob_class_ref_quality_metrics_widget.py b/evidently/widgets/prob_class_ref_quality_metrics_widget.py
index 72a30fa1b0..2e91cbd455 100644
--- a/evidently/widgets/prob_class_ref_quality_metrics_widget.py
+++ b/evidently/widgets/prob_class_ref_quality_metrics_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/prob_class_ref_roc_curve_widget.py b/evidently/widgets/prob_class_ref_roc_curve_widget.py
index 8a3fc3a554..5b2eb965cf 100644
--- a/evidently/widgets/prob_class_ref_roc_curve_widget.py
+++ b/evidently/widgets/prob_class_ref_roc_curve_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction or target data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -108,7 +108,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": fig_json['data'],
"layout": fig_json['layout']
@@ -164,7 +164,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"graphs": graphs
},
diff --git a/evidently/widgets/prob_class_target_name_widget.py b/evidently/widgets/prob_class_target_name_widget.py
index 55ce2a54b8..43f73cdb76 100644
--- a/evidently/widgets/prob_class_target_name_widget.py
+++ b/evidently/widgets/prob_class_target_name_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/raw_widget.py b/evidently/widgets/raw_widget.py
index e6a715b1f1..b4ed49eeca 100644
--- a/evidently/widgets/raw_widget.py
+++ b/evidently/widgets/raw_widget.py
@@ -11,7 +11,7 @@
class RawWidget(Widget):
@abc.abstractmethod
- def calculate(self, reference_data: pandas.DataFrame, production_data: pandas.DataFrame, column_mapping,
+ def calculate(self, reference_data: pandas.DataFrame, current_data: pandas.DataFrame, column_mapping,
analyzes_results):
raise NotImplemented()
diff --git a/evidently/widgets/reg_prod_abs_perc_error_in_time_widget.py b/evidently/widgets/reg_prod_abs_perc_error_in_time_widget.py
index a30c3dc5f0..4d895808df 100644
--- a/evidently/widgets/reg_prod_abs_perc_error_in_time_widget.py
+++ b/evidently/widgets/reg_prod_abs_perc_error_in_time_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,19 +60,19 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
abs_perc_error_time = go.Figure()
abs_perc_error = list(map(lambda x : 100*abs(x[0] - x[1])/x[0],
- zip(production_data[target_column], production_data[prediction_column])))
+ zip(current_data[target_column], current_data[prediction_column])))
error_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
+ x = current_data[date_column] if date_column else current_data.index,
y = abs_perc_error,
mode = 'lines',
name = 'Absolute Percentage Error',
@@ -83,8 +83,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
)
zero_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = [0]*production_data.shape[0],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = [0]*current_data.shape[0],
mode = 'lines',
opacity=0.5,
marker=dict(
diff --git a/evidently/widgets/reg_prod_colored_pred_vs_actual_widget.py b/evidently/widgets/reg_prod_colored_pred_vs_actual_widget.py
index 2198bab258..62a403ea2e 100644
--- a/evidently/widgets/reg_prod_colored_pred_vs_actual_widget.py
+++ b/evidently/widgets/reg_prod_colored_pred_vs_actual_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,26 +60,26 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- prod_error = production_data[prediction_column] - production_data[target_column]
+ current_error = current_data[prediction_column] - current_data[target_column]
- prod_quntile_5 = np.quantile(prod_error, .05)
- prod_quntile_95 = np.quantile(prod_error, .95)
+ current_quntile_5 = np.quantile(current_error, .05)
+ current_quntile_95 = np.quantile(current_error, .95)
- production_data['dataset'] = 'Current'
- production_data['Error bias'] = list(map(lambda x : 'Underestimation' if x <= prod_quntile_5 else 'Majority'
- if x < prod_quntile_95 else 'Overestimation', prod_error))
+ current_data['dataset'] = 'Current'
+ current_data['Error bias'] = list(map(lambda x : 'Underestimation' if x <= current_quntile_5 else 'Majority'
+ if x < current_quntile_95 else 'Overestimation', current_error))
#plot output correlations
pred_actual = go.Figure()
pred_actual.add_trace(go.Scatter(
- x = production_data[production_data['Error bias'] == 'Underestimation'][target_column],
- y = production_data[production_data['Error bias'] == 'Underestimation'][prediction_column],
+ x = current_data[current_data['Error bias'] == 'Underestimation'][target_column],
+ y = current_data[current_data['Error bias'] == 'Underestimation'][prediction_column],
mode = 'markers',
name = 'Underestimation',
marker = dict(
@@ -89,8 +89,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
))
pred_actual.add_trace(go.Scatter(
- x = production_data[production_data['Error bias'] == 'Overestimation'][target_column],
- y = production_data[production_data['Error bias'] == 'Overestimation'][prediction_column],
+ x = current_data[current_data['Error bias'] == 'Overestimation'][target_column],
+ y = current_data[current_data['Error bias'] == 'Overestimation'][prediction_column],
mode = 'markers',
name = 'Overestimation',
marker = dict(
@@ -100,8 +100,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
))
pred_actual.add_trace(go.Scatter(
- x = production_data[production_data['Error bias'] == 'Majority'][target_column],
- y = production_data[production_data['Error bias'] == 'Majority'][prediction_column],
+ x = current_data[current_data['Error bias'] == 'Majority'][target_column],
+ y = current_data[current_data['Error bias'] == 'Majority'][prediction_column],
mode = 'markers',
name = 'Majority',
marker = dict(
diff --git a/evidently/widgets/reg_prod_error_distr_widget.py b/evidently/widgets/reg_prod_error_distr_widget.py
index 6f2972ae65..3a1dfaec6a 100644
--- a/evidently/widgets/reg_prod_error_distr_widget.py
+++ b/evidently/widgets/reg_prod_error_distr_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,15 +60,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
error_distr = go.Figure()
- error = production_data[prediction_column] - production_data[target_column]
+ error = current_data[prediction_column] - current_data[target_column]
error_distr.add_trace(go.Histogram(x=error,
marker_color=red, name = 'error distribution', histnorm = 'percent'))
diff --git a/evidently/widgets/reg_prod_error_in_time_widget.py b/evidently/widgets/reg_prod_error_in_time_widget.py
index 9b165b3436..c782d50b18 100644
--- a/evidently/widgets/reg_prod_error_in_time_widget.py
+++ b/evidently/widgets/reg_prod_error_in_time_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,17 +60,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
pred_actual_time = go.Figure()
error_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = production_data[prediction_column] - production_data[target_column],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = current_data[prediction_column] - current_data[target_column],
mode = 'lines',
name = 'Predicted - Actual',
marker=dict(
@@ -80,8 +80,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
)
zero_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = [0]*production_data.shape[0],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = [0]*current_data.shape[0],
mode = 'lines',
opacity=0.5,
marker=dict(
diff --git a/evidently/widgets/reg_prod_error_normality_widget.py b/evidently/widgets/reg_prod_error_normality_widget.py
index 8f69652bec..f9dd572f56 100644
--- a/evidently/widgets/reg_prod_error_normality_widget.py
+++ b/evidently/widgets/reg_prod_error_normality_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -59,15 +59,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
error_norm = go.Figure()
- error = production_data[prediction_column] - production_data[target_column]
+ error = current_data[prediction_column] - current_data[target_column]
qq_lines = probplot(error, dist="norm", plot=None)
theoretical_q_x = np.linspace(qq_lines[0][0][0], qq_lines[0][0][-1], 100)
diff --git a/evidently/widgets/reg_prod_pred_and_actual_in_time_widget.py b/evidently/widgets/reg_prod_pred_and_actual_in_time_widget.py
index 0b2d9f1e79..4ef2e5c2ef 100644
--- a/evidently/widgets/reg_prod_pred_and_actual_in_time_widget.py
+++ b/evidently/widgets/reg_prod_pred_and_actual_in_time_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -59,17 +59,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
pred_actual_time = go.Figure()
target_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = production_data[target_column],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = current_data[target_column],
mode = 'lines',
name = 'Actual',
marker=dict(
@@ -79,8 +79,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
)
pred_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = production_data[prediction_column],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = current_data[prediction_column],
mode = 'lines',
name = 'Predicted',
marker=dict(
@@ -90,8 +90,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
)
zero_trace = go.Scatter(
- x = production_data[date_column] if date_column else production_data.index,
- y = [0]*production_data.shape[0],
+ x = current_data[date_column] if date_column else current_data.index,
+ y = [0]*current_data.shape[0],
mode = 'lines',
opacity=0.5,
marker=dict(
diff --git a/evidently/widgets/reg_prod_pred_vs_actual_widget.py b/evidently/widgets/reg_prod_pred_vs_actual_widget.py
index 329611ca6b..d1150773a2 100644
--- a/evidently/widgets/reg_prod_pred_vs_actual_widget.py
+++ b/evidently/widgets/reg_prod_pred_vs_actual_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -60,17 +60,17 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#plot output correlations
pred_actual = go.Figure()
pred_actual.add_trace(go.Scatter(
- x = production_data[target_column],
- y = production_data[prediction_column],
+ x = current_data[target_column],
+ y = current_data[prediction_column],
mode = 'markers',
name = 'Current',
marker = dict(
diff --git a/evidently/widgets/reg_prod_quality_metrics_widget.py b/evidently/widgets/reg_prod_quality_metrics_widget.py
index 20340c0942..87b1068f1a 100644
--- a/evidently/widgets/reg_prod_quality_metrics_widget.py
+++ b/evidently/widgets/reg_prod_quality_metrics_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
#raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -62,20 +62,20 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
num_feature_names = list(set(reference_data.select_dtypes([np.number]).columns) - set(utility_columns))
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
+ if current_data is not None:
if target_column is not None and prediction_column is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
#calculate quality metrics
- me = np.mean(production_data[prediction_column] - production_data[target_column])
- sde = np.std(production_data[prediction_column] - production_data[target_column], ddof = 1)
+ me = np.mean(current_data[prediction_column] - current_data[target_column])
+ sde = np.std(current_data[prediction_column] - current_data[target_column], ddof = 1)
- abs_err = np.abs(production_data[prediction_column] - production_data[target_column])
+ abs_err = np.abs(current_data[prediction_column] - current_data[target_column])
mae = np.mean(abs_err)
sdae = np.std(abs_err, ddof = 1)
- abs_perc_err = 100.*np.abs(production_data[prediction_column] - production_data[target_column])/production_data[target_column]
+ abs_perc_err = 100.*np.abs(current_data[prediction_column] - current_data[target_column])/current_data[target_column]
mape = np.mean(abs_perc_err)
sdape = np.std(abs_perc_err, ddof = 1)
diff --git a/evidently/widgets/reg_prod_underperform_metrics_widget.py b/evidently/widgets/reg_prod_underperform_metrics_widget.py
index cc5f281571..a5eb5036bb 100644
--- a/evidently/widgets/reg_prod_underperform_metrics_widget.py
+++ b/evidently/widgets/reg_prod_underperform_metrics_widget.py
@@ -29,7 +29,7 @@ def analyzers(self):
def get_info(self) -> BaseWidgetInfo:
return self.wi
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -59,24 +59,24 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
- prod_error = production_data[prediction_column] - production_data[target_column]
+ current_error = current_data[prediction_column] - current_data[target_column]
- prod_quantile_5 = np.quantile(prod_error, .05)
- prod_quantile_95 = np.quantile(prod_error, .95)
+ current_quantile_5 = np.quantile(current_error, .05)
+ current_quantile_95 = np.quantile(current_error, .95)
- prod_mae = np.mean(prod_error)
- prod_mae_under = np.mean(prod_error[prod_error <= prod_quantile_5])
- prod_mae_exp = np.mean(prod_error[(prod_error > prod_quantile_5) & (prod_error < prod_quantile_95)])
- prod_mae_over = np.mean(prod_error[prod_error >= prod_quantile_95])
+ current_mae = np.mean(current_error)
+ current_mae_under = np.mean(current_error[current_error <= current_quantile_5])
+ current_mae_exp = np.mean(current_error[(current_error > current_quantile_5) & (current_error < current_quantile_95)])
+ current_mae_over = np.mean(current_error[current_error >= current_quantile_95])
- prod_sd = np.std(prod_error, ddof = 1)
- prod_sd_under = np.std(prod_error[prod_error <= prod_quantile_5], ddof = 1)
- prod_sd_exp = np.std(prod_error[(prod_error > prod_quantile_5) & (prod_error < prod_quantile_95)], ddof = 1)
- prod_sd_over = np.std(prod_error[prod_error >= prod_quantile_95], ddof = 1)
+ current_sd = np.std(current_error, ddof = 1)
+ current_sd_under = np.std(current_error[current_error <= current_quantile_5], ddof = 1)
+ current_sd_exp = np.std(current_error[(current_error > current_quantile_5) & (current_error < current_quantile_95)], ddof = 1)
+ current_sd_over = np.std(current_error[current_error >= current_quantile_95], ddof = 1)
self.wi = BaseWidgetInfo(
title=self.title,
@@ -90,19 +90,19 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
params={
"counters": [
{
- "value": str(round(prod_mae_exp, 2)) + " (" + str(round(prod_sd_exp, 2)) + ")",
+ "value": str(round(current_mae_exp, 2)) + " (" + str(round(current_sd_exp, 2)) + ")",
"label": "Majority(90%)"
},
#{
- # "value": str(round(prod_mae_exp, 2)) + " (" + str(round(prod_sd_exp,2)) + ")",
+ # "value": str(round(current_mae_exp, 2)) + " (" + str(round(current_sd_exp,2)) + ")",
# "label": "Expected"
#},
{
- "value": str(round(prod_mae_under, 2)) + " (" + str(round(prod_sd_under, 2)) + ")",
+ "value": str(round(current_mae_under, 2)) + " (" + str(round(current_sd_under, 2)) + ")",
"label": "Underestimation(5%)"
},
{
- "value": str(round(prod_mae_over, 2)) + " (" + str(round(prod_sd_over, 2)) + ")",
+ "value": str(round(current_mae_over, 2)) + " (" + str(round(current_sd_over, 2)) + ")",
"label": "Overestimation(5%)"
}
]
diff --git a/evidently/widgets/reg_ref_abs_perc_error_in_time_widget.py b/evidently/widgets/reg_ref_abs_perc_error_in_time_widget.py
index 4c26a37f50..f919728a82 100644
--- a/evidently/widgets/reg_ref_abs_perc_error_in_time_widget.py
+++ b/evidently/widgets/reg_ref_abs_perc_error_in_time_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_colored_pred_vs_actual_widget.py b/evidently/widgets/reg_ref_colored_pred_vs_actual_widget.py
index cc15869690..24dec56572 100644
--- a/evidently/widgets/reg_ref_colored_pred_vs_actual_widget.py
+++ b/evidently/widgets/reg_ref_colored_pred_vs_actual_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -130,7 +130,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
alerts=[],
alertsPosition="row",
insights=[],
- size=1 if production_data is not None else 2,
+ size=1 if current_data is not None else 2,
params={
"data": pred_actual_json['data'],
"layout": pred_actual_json['layout']
diff --git a/evidently/widgets/reg_ref_error_distr_widget.py b/evidently/widgets/reg_ref_error_distr_widget.py
index c04ce5469e..f75d54a415 100644
--- a/evidently/widgets/reg_ref_error_distr_widget.py
+++ b/evidently/widgets/reg_ref_error_distr_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_error_in_time_widget.py b/evidently/widgets/reg_ref_error_in_time_widget.py
index 511d5ccfbc..5fde1660cb 100644
--- a/evidently/widgets/reg_ref_error_in_time_widget.py
+++ b/evidently/widgets/reg_ref_error_in_time_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_error_normality_widget.py b/evidently/widgets/reg_ref_error_normality_widget.py
index b76f319162..d3da3ada90 100644
--- a/evidently/widgets/reg_ref_error_normality_widget.py
+++ b/evidently/widgets/reg_ref_error_normality_widget.py
@@ -30,7 +30,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No prediction data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_pred_and_actual_in_time_widget.py b/evidently/widgets/reg_ref_pred_and_actual_in_time_widget.py
index cf60dc6289..979ab45d6e 100644
--- a/evidently/widgets/reg_ref_pred_and_actual_in_time_widget.py
+++ b/evidently/widgets/reg_ref_pred_and_actual_in_time_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_pred_vs_actual_widget.py b/evidently/widgets/reg_ref_pred_vs_actual_widget.py
index 26606e9736..be1fa0c71b 100644
--- a/evidently/widgets/reg_ref_pred_vs_actual_widget.py
+++ b/evidently/widgets/reg_ref_pred_vs_actual_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_quality_metrics_widget.py b/evidently/widgets/reg_ref_quality_metrics_widget.py
index 139f49428d..e8e9de7fdd 100644
--- a/evidently/widgets/reg_ref_quality_metrics_widget.py
+++ b/evidently/widgets/reg_ref_quality_metrics_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_ref_underperform_metrics_widget.py b/evidently/widgets/reg_ref_underperform_metrics_widget.py
index f0bcbfc221..4e0a2ebee2 100644
--- a/evidently/widgets/reg_ref_underperform_metrics_widget.py
+++ b/evidently/widgets/reg_ref_underperform_metrics_widget.py
@@ -31,7 +31,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("no widget info provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_target_name_widget.py b/evidently/widgets/reg_target_name_widget.py
index e781bfce0a..cd1708b4ef 100644
--- a/evidently/widgets/reg_target_name_widget.py
+++ b/evidently/widgets/reg_target_name_widget.py
@@ -33,7 +33,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("No reference data with target and prediction provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
diff --git a/evidently/widgets/reg_underperform_segments_table_widget.py b/evidently/widgets/reg_underperform_segments_table_widget.py
index 6eec0ccc97..b0fd58a143 100644
--- a/evidently/widgets/reg_underperform_segments_table_widget.py
+++ b/evidently/widgets/reg_underperform_segments_table_widget.py
@@ -32,7 +32,7 @@ def get_info(self) -> BaseWidgetInfo:
return self.wi
raise ValueError("no widget info provided")
- def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame, column_mapping, analyzes_results):
+ def calculate(self, reference_data: pd.DataFrame, current_data: pd.DataFrame, column_mapping, analyzes_results):
if column_mapping:
date_column = column_mapping.get('datetime')
id_column = column_mapping.get('id')
@@ -62,34 +62,34 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
cat_feature_names = list(set(reference_data.select_dtypes([np.object]).columns) - set(utility_columns))
- if production_data is not None:
- production_data.replace([np.inf, -np.inf], np.nan, inplace=True)
- production_data.dropna(axis=0, how='any', inplace=True)
+ if current_data is not None:
+ current_data.replace([np.inf, -np.inf], np.nan, inplace=True)
+ current_data.dropna(axis=0, how='any', inplace=True)
reference_data.replace([np.inf, -np.inf], np.nan, inplace=True)
reference_data.dropna(axis=0, how='any', inplace=True)
ref_error = reference_data[prediction_column] - reference_data[target_column]
- prod_error = production_data[prediction_column] - production_data[target_column]
+ current_error = current_data[prediction_column] - current_data[target_column]
ref_quntile_5 = np.quantile(ref_error, .05)
ref_quntile_95 = np.quantile(ref_error, .95)
- prod_quntile_5 = np.quantile(prod_error, .05)
- prod_quntile_95 = np.quantile(prod_error, .95)
+ current_quntile_5 = np.quantile(current_error, .05)
+ current_quntile_95 = np.quantile(current_error, .95)
#create subplots
reference_data['dataset'] = 'Reference'
reference_data['Error bias'] = list(map(lambda x : 'Underestimation' if x <= ref_quntile_5 else 'Majority'
if x < ref_quntile_95 else 'Overestimation', ref_error))
- production_data['dataset'] = 'Current'
- production_data['Error bias'] = list(map(lambda x : 'Underestimation' if x <= prod_quntile_5 else 'Majority'
- if x < prod_quntile_95 else 'Overestimation', prod_error))
- merged_data = pd.concat([reference_data, production_data])
+ current_data['dataset'] = 'Current'
+ current_data['Error bias'] = list(map(lambda x : 'Underestimation' if x <= current_quntile_5 else 'Majority'
+ if x < current_quntile_95 else 'Overestimation', current_error))
+ merged_data = pd.concat([reference_data, current_data])
reference_data.drop(['dataset', 'Error bias'], axis=1, inplace=True)
- production_data.drop(['dataset', 'Error bias'], axis=1, inplace=True)
+ current_data.drop(['dataset', 'Error bias'], axis=1, inplace=True)
params_data = []
additional_graphs_data = []
@@ -103,11 +103,11 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
ref_over_value = np.mean(reference_data[ref_error >= ref_quntile_95][feature_name])
ref_range_value = 0 if ref_over_value == ref_under_value else 100*abs(ref_over_value - ref_under_value)/(np.max(reference_data[feature_name]) - np.min(reference_data[feature_name]))
- prod_overal_value = np.mean(production_data[feature_name])
- prod_under_value = np.mean(production_data[prod_error <= prod_quntile_5][feature_name])
- prod_expected_value = np.mean(production_data[(prod_error > prod_quntile_5) & (prod_error < prod_quntile_95)][feature_name])
- prod_over_value = np.mean(production_data[prod_error >= prod_quntile_95][feature_name])
- prod_range_value = 0 if prod_over_value == prod_under_value else 100*abs(prod_over_value - prod_under_value)/(np.max(production_data[feature_name]) - np.min(production_data[feature_name]))
+ current_overal_value = np.mean(current_data[feature_name])
+ current_under_value = np.mean(current_data[current_error <= current_quntile_5][feature_name])
+ current_expected_value = np.mean(current_data[(current_error > current_quntile_5) & (current_error < current_quntile_95)][feature_name])
+ current_over_value = np.mean(current_data[current_error >= current_quntile_95][feature_name])
+ current_range_value = 0 if current_over_value == current_under_value else 100*abs(current_over_value - current_under_value)/(np.max(current_data[feature_name]) - np.min(current_data[feature_name]))
feature_hist = px.histogram(merged_data, x=feature_name, color='Error bias', facet_col="dataset",
@@ -124,8 +124,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
mode = 'markers',
marker=dict(
size=6,
- cmax=max(max(reference_data[feature_name]), max(production_data[feature_name])),
- cmin=min(min(reference_data[feature_name]), min(production_data[feature_name])),
+ cmax=max(max(reference_data[feature_name]), max(current_data[feature_name])),
+ cmin=min(min(reference_data[feature_name]), min(current_data[feature_name])),
color=reference_data[feature_name],
#colorbar=dict(
# title="Colorbar"
@@ -138,15 +138,15 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
segment_fig.add_trace(
go.Scatter(
- x = production_data[target_column],
- y = production_data[prediction_column],
+ x = current_data[target_column],
+ y = current_data[prediction_column],
mode = 'markers',
#name = feature_name + ' (curr)',
marker=dict(
size=6,
- cmax=max(max(reference_data[feature_name]), max(production_data[feature_name])),
- cmin=min(min(reference_data[feature_name]), min(production_data[feature_name])),
- color=production_data[feature_name],
+ cmax=max(max(reference_data[feature_name]), max(current_data[feature_name])),
+ cmin=min(min(reference_data[feature_name]), min(current_data[feature_name])),
+ color=current_data[feature_name],
colorbar=dict(
title=feature_name
),
@@ -190,10 +190,10 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
"f4": round(ref_under_value, 2),
"f5": round(ref_over_value, 2),
"f6": round(ref_range_value, 2),
- "f7": round(prod_expected_value, 2),
- "f8": round(prod_under_value, 2),
- "f9": round(prod_over_value, 2),
- "f10": round(prod_range_value, 2)
+ "f7": round(current_expected_value, 2),
+ "f8": round(current_under_value, 2),
+ "f9": round(current_over_value, 2),
+ "f10": round(current_range_value, 2)
}
)
@@ -227,12 +227,12 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
ref_range_value = 1 if (ref_overal_value != ref_under_value) or (ref_over_value != ref_overal_value) \
or (ref_under_value != ref_overal_value) else 0
- prod_overal_value = production_data[feature_name].value_counts().idxmax()
- prod_under_value = production_data[prod_error <= prod_quntile_5][feature_name].value_counts().idxmax()
- #prod_expected_value = production_data[(prod_error > prod_quntile_5) & (prod_error < prod_quntile_95)][feature_name].value_counts().idxmax()
- prod_over_value = production_data[prod_error >= prod_quntile_95][feature_name].value_counts().idxmax()
- prod_range_value = 1 if (prod_overal_value != prod_under_value) or (prod_over_value != prod_overal_value) \
- or (prod_under_value != prod_overal_value) else 0
+ current_overal_value = current_data[feature_name].value_counts().idxmax()
+ current_under_value = current_data[current_error <= current_quntile_5][feature_name].value_counts().idxmax()
+ #current_expected_value = current_data[(current_error > current_quntile_5) & (current_error < current_quntile_95)][feature_name].value_counts().idxmax()
+ current_over_value = current_data[current_error >= current_quntile_95][feature_name].value_counts().idxmax()
+ current_range_value = 1 if (current_overal_value != current_under_value) or (current_over_value != current_overal_value) \
+ or (current_under_value != current_overal_value) else 0
feature_hist = px.histogram(merged_data, x=feature_name, color='Error bias', facet_col="dataset",
histnorm = 'percent', barmode='overlay', category_orders={"dataset": ["Reference", "Current"], "Error bias": ["Underestimation", "Overestimation", "Majority"]})
@@ -250,8 +250,8 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
#marker_color = reference_data[feature_name],
marker=dict(
size=6,
- cmax=max(max(reference_data[feature_name]), max(production_data[feature_name])),
- cmin=min(min(reference_data[feature_name]), min(production_data[feature_name])),
+ cmax=max(max(reference_data[feature_name]), max(current_data[feature_name])),
+ cmin=min(min(reference_data[feature_name]), min(current_data[feature_name])),
color=reference_data[feature_name],
#colorbar=dict(
# title="Colorbar"
@@ -264,16 +264,16 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
segment_fig.add_trace(
go.Scatter(
- x = production_data[target_column],
- y = production_data[prediction_column],
+ x = current_data[target_column],
+ y = current_data[prediction_column],
mode = 'markers',
#name = feature_name + ' (curr)',
- #marker_color = production_data[feature_name],
+ #marker_color = current_data[feature_name],
marker=dict(
size=6,
- cmax=max(max(reference_data[feature_name]), max(production_data[feature_name])),
- cmin=min(min(reference_data[feature_name]), min(production_data[feature_name])),
- color=production_data[feature_name],
+ cmax=max(max(reference_data[feature_name]), max(current_data[feature_name])),
+ cmin=min(min(reference_data[feature_name]), min(current_data[feature_name])),
+ color=current_data[feature_name],
colorbar=dict(
title=feature_name
),
@@ -315,10 +315,10 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
"f4": str(ref_under_value),
"f5": str(ref_over_value),
"f6": str(ref_range_value),
- "f7": str(prod_overal_value),
- "f8": str(prod_under_value),
- "f9": str(prod_over_value),
- "f10": int(prod_range_value)
+ "f7": str(current_overal_value),
+ "f8": str(current_under_value),
+ "f9": str(current_over_value),
+ "f10": int(current_range_value)
}
)
@@ -431,7 +431,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
category_orders={"Error bias": ["Underestimation", "Overestimation", "Majority"]})
#hist_fig = px.histogram(reference_data, x=feature_name, color=target_column, facet_col="dataset",
- # category_orders={"dataset": ["Reference", "Production"]})
+ # category_orders={"dataset": ["Reference", "Сurrent"]})
hist_figure = json.loads(hist.to_json())
@@ -498,7 +498,7 @@ def calculate(self, reference_data: pd.DataFrame, production_data: pd.DataFrame,
barmode='overlay', category_orders={"Error bias": ["Underestimation", "Overestimation", "Majority"]})
#hist_fig = px.histogram(reference_data, x=feature_name, color=target_column, facet_col="dataset",
- # category_orders={"dataset": ["Reference", "Production"]})
+ # category_orders={"dataset": ["Reference", "Сurrent"]})
hist_figure = json.loads(hist.to_json())
diff --git a/evidently/widgets/widget.py b/evidently/widgets/widget.py
index 093b2c0ff6..34c5b35cf5 100644
--- a/evidently/widgets/widget.py
+++ b/evidently/widgets/widget.py
@@ -14,7 +14,7 @@ def __init__(self):
@abc.abstractmethod
def calculate(self, reference_data: pandas.DataFrame,
- production_data: pandas.DataFrame, column_mapping, analyzers_results):
+ current_data: pandas.DataFrame, column_mapping, analyzers_results):
raise NotImplemented()
@abc.abstractmethod
diff --git a/setup.py b/setup.py
index 58513888b5..6747b7348d 100644
--- a/setup.py
+++ b/setup.py
@@ -78,6 +78,7 @@
"statsmodels",
"plotly",
"scipy",
+ "pyyaml",
"scikit-learn>=0.22.1"
],
entry_points={