From 2c7f36e7d000c4ecef940303a2998316f4b8a4d5 Mon Sep 17 00:00:00 2001 From: Olga Filippova <36808731+0lgaF@users.noreply.github.com> Date: Fri, 13 Oct 2023 13:23:32 +0300 Subject: [PATCH] Fix_typo (#812) Co-authored-by: 0lgaF --- .../how_to_calculate_recsys_metrics.ipynb | 86 +++++++++---------- .../calculations/recommender_systems.py | 16 ++-- src/evidently/pipeline/column_mapping.py | 2 +- src/evidently/utils/data_preprocessing.py | 4 +- tests/metrics/recsys/test_f_beta_top_k.py | 2 +- tests/metrics/recsys/test_map_k.py | 4 +- tests/metrics/recsys/test_mar_k.py | 2 +- tests/metrics/recsys/test_ndcg_k.py | 2 +- tests/metrics/recsys/test_precision_top_k.py | 4 +- tests/metrics/recsys/test_recall_top_k.py | 4 +- 10 files changed, 63 insertions(+), 63 deletions(-) diff --git a/examples/how_to_questions/how_to_calculate_recsys_metrics.ipynb b/examples/how_to_questions/how_to_calculate_recsys_metrics.ipynb index 3a295e8cfb..7114dc5c9a 100644 --- a/examples/how_to_questions/how_to_calculate_recsys_metrics.ipynb +++ b/examples/how_to_questions/how_to_calculate_recsys_metrics.ipynb @@ -1,6 +1,6 @@ { "nbformat": 4, - "nbformat_minor": 0, + "nbformat_minor": 2, "metadata": { "colab": { "provenance": [] @@ -17,19 +17,20 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "id": "8aJYBZFNMyXc" - }, - "outputs": [], "source": [ "try:\n", " import evidently\n", "except:\n", " !pip install git+https://github.com/evidentlyai/evidently.git" - ] + ], + "outputs": [], + "metadata": { + "id": "8aJYBZFNMyXc" + } }, { "cell_type": "code", + "execution_count": null, "source": [ "import pandas as pd\n", "import numpy as np\n", @@ -40,22 +41,21 @@ "\n", "import requests" ], + "outputs": [], "metadata": { "id": "UfuNPLwjO99K" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "!pip install implicit" ], + "outputs": [], "metadata": { "id": "8A_dH0K0082d" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "markdown", @@ -68,6 +68,7 @@ }, { "cell_type": "code", + "execution_count": null, "source": [ "content = requests.get(\"http://files.grouplens.org/datasets/movielens/ml-100k.zip\").content\n", "\n", @@ -77,14 +78,14 @@ " movies = arc.read(\"ml-100k/u.item\").decode(encoding='latin-1').split(\"\\n\")\n", " users = arc.read(\"ml-100k/u.user\").decode(encoding='latin-1').split(\"\\n\")" ], + "outputs": [], "metadata": { "id": "f1wLolXpM02U" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "columns = ['user_id', 'movie_id', 'rating', 'timestamp']\n", "\n", @@ -107,11 +108,10 @@ "movies.drop(columns=['-', 'url'], inplace=True)\n", "movies[genres] = movies[genres].astype(int)" ], + "outputs": [], "metadata": { "id": "-V1w4P5LeV4X" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "markdown", @@ -124,6 +124,7 @@ }, { "cell_type": "code", + "execution_count": null, "source": [ "def transform_predictions(k, user_ids, item_ids):\n", " return pd.DataFrame(\n", @@ -138,11 +139,10 @@ " preds['rank'] = preds.groupby('user_id')['rank'].transform(lambda x: x.fillna(x.max() + 1))\n", " return preds" ], + "outputs": [], "metadata": { "id": "MqP6bLDv92hY" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "markdown", @@ -155,6 +155,7 @@ }, { "cell_type": "code", + "execution_count": null, "source": [ "from implicit.cpu.als import AlternatingLeastSquares\n", "from scipy.sparse import csr_matrix\n", @@ -163,38 +164,38 @@ "als_model = AlternatingLeastSquares(factors=20, iterations=5, random_state=0)\n", "als_model.fit(csr_matrix(pivot_table))" ], + "outputs": [], "metadata": { "id": "-FQDsHEA3OKw" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "test_users = test.user_id.unique()\n", "len(test_users)" ], + "outputs": [], "metadata": { "id": "LLVvpQOvFIzl" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "ref_true = test[test.user_id.isin(test_users[:471])]\n", "curr_true = test[test.user_id.isin(test_users[471:])]" ], + "outputs": [], "metadata": { "id": "Sa8bO0CHFAJV" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "ids, scores = als_model.recommend(ref_true.user_id.unique() - 1, csr_matrix(pivot_table.loc[ref_true.user_id.unique()]), N=30, filter_already_liked_items=True)\n", "ref = get_full_df(30, ref_true.user_id.unique(), ids, ref_true)\n", @@ -202,27 +203,27 @@ "ids, scores = als_model.recommend(curr_true.user_id.unique() - 1, csr_matrix(pivot_table.loc[curr_true.user_id.unique()]), N=30, filter_already_liked_items=True)\n", "curr = get_full_df(30, curr_true.user_id.unique(), ids, curr_true)" ], + "outputs": [], "metadata": { "id": "LipvjnXIH3Y5" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "from evidently.metrics import PrecisionTopKMetric, RecallTopKMetric, FBetaTopKMetric, MAPKMetric, NDCGKMetric\n", "from evidently.pipeline.column_mapping import ColumnMapping\n", "from evidently.report import Report" ], + "outputs": [], "metadata": { "id": "vxU6s88ism0_" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [ "report = Report(metrics=[\n", " PrecisionTopKMetric(k=5),\n", @@ -231,24 +232,23 @@ " MAPKMetric(k=5),\n", " NDCGKMetric(k=5),\n", "])\n", - "column_mapping=ColumnMapping(recomendations_type='rank', target='rating', prediction='rank')\n", + "column_mapping=ColumnMapping(recommendations_type='rank', target='rating', prediction='rank')\n", "report.run(reference_data=ref.fillna(0), current_data=curr.fillna(0), column_mapping=column_mapping)\n", "report" ], + "outputs": [], "metadata": { "id": "7KIQreI6tKEA" - }, - "execution_count": null, - "outputs": [] + } }, { "cell_type": "code", + "execution_count": null, "source": [], + "outputs": [], "metadata": { "id": "jSiTOBzhOl1s" - }, - "execution_count": null, - "outputs": [] + } } ] } \ No newline at end of file diff --git a/src/evidently/calculations/recommender_systems.py b/src/evidently/calculations/recommender_systems.py index 5dc2b32299..f691895643 100644 --- a/src/evidently/calculations/recommender_systems.py +++ b/src/evidently/calculations/recommender_systems.py @@ -9,7 +9,7 @@ def collect_dataset( users: pd.Series, target: pd.Series, preds: pd.Series, - recomendations_type: str, + recommendations_type: str, min_rel_score: Optional[int], no_feedback_users: bool, bin_data: bool, @@ -18,7 +18,7 @@ def collect_dataset( df.columns = ["users", "target", "preds"] if min_rel_score: df["target"] = (df["target"] >= min_rel_score).astype(int) - if recomendations_type == "score": + if recommendations_type == "score": df["preds"] = df.groupby("users")["preds"].transform("rank", ascending=False) if bin_data: df["target"] = (df["target"] > 0).astype(int) @@ -37,10 +37,10 @@ def get_curr_and_ref_df( if target_column is None or prediction is None: raise ValueError("Target and prediction were not found in data.") _, target_current, target_reference = data.get_data(target_column.column_name) - recomendations_type = data.column_mapping.recomendations_type - if recomendations_type is None: - recomendations_type = "scores" - if recomendations_type == "rank" and prediction.predicted_values is not None: + recommendations_type = data.column_mapping.recommendations_type + if recommendations_type is None: + recommendations_type = "scores" + if recommendations_type == "rank" and prediction.predicted_values is not None: pred_name = prediction.predicted_values.column_name elif prediction.prediction_probas is not None: pred_name = prediction.prediction_probas[0].column_name @@ -53,7 +53,7 @@ def get_curr_and_ref_df( user_current, target_current, prediction_current, - recomendations_type, + recommendations_type, min_rel_score, no_feedback_users, bin_data, @@ -65,7 +65,7 @@ def get_curr_and_ref_df( user_reference, target_reference, prediction_reference, - recomendations_type, + recommendations_type, min_rel_score, no_feedback_users, bin_data, diff --git a/src/evidently/pipeline/column_mapping.py b/src/evidently/pipeline/column_mapping.py index 700f0694ee..0ae77651d8 100644 --- a/src/evidently/pipeline/column_mapping.py +++ b/src/evidently/pipeline/column_mapping.py @@ -32,7 +32,7 @@ class ColumnMapping: embeddings: Optional[Embeddings] = None user_id: Optional[str] = "user_id" item_id: Optional[str] = "item_id" - recomendations_type: Optional[str] = "score" + recommendations_type: Optional[str] = "score" def is_classification_task(self): return self.task == TaskType.CLASSIFICATION_TASK diff --git a/src/evidently/utils/data_preprocessing.py b/src/evidently/utils/data_preprocessing.py index 27cc52afc6..e0ff8e03c3 100644 --- a/src/evidently/utils/data_preprocessing.py +++ b/src/evidently/utils/data_preprocessing.py @@ -195,9 +195,9 @@ def _prediction_column( raise ValueError("Prediction type is categorical but task is regression") if prediction_type == ColumnType.Numerical: return PredictionColumns(predicted_values=ColumnDefinition(prediction, prediction_type)) - if mapping is not None and mapping.recomendations_type == "rank": + if mapping is not None and mapping.recommendations_type == "rank": return PredictionColumns(predicted_values=ColumnDefinition(prediction, prediction_type)) - if task == TaskType.RECOMMENDER_SYSTEMS and mapping is not None and mapping.recomendations_type == "score": + if task == TaskType.RECOMMENDER_SYSTEMS and mapping is not None and mapping.recommendations_type == "score": return PredictionColumns(prediction_probas=[ColumnDefinition(prediction, prediction_type)]) if task is None: if prediction_type == ColumnType.Numerical and target_type == ColumnType.Categorical: diff --git a/tests/metrics/recsys/test_f_beta_top_k.py b/tests/metrics/recsys/test_f_beta_top_k.py index 48c7a1065d..5e78213905 100644 --- a/tests/metrics/recsys/test_f_beta_top_k.py +++ b/tests/metrics/recsys/test_f_beta_top_k.py @@ -17,7 +17,7 @@ def test_fbeta_values(): metric = FBetaTopKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() diff --git a/tests/metrics/recsys/test_map_k.py b/tests/metrics/recsys/test_map_k.py index 1507fefb73..56aacbe7e3 100644 --- a/tests/metrics/recsys/test_map_k.py +++ b/tests/metrics/recsys/test_map_k.py @@ -17,7 +17,7 @@ def test_map_value(): metric = MAPKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() @@ -38,7 +38,7 @@ def test_map_value_judged_only(): metric = MAPKMetric(k=3, no_feedback_users=True) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() diff --git a/tests/metrics/recsys/test_mar_k.py b/tests/metrics/recsys/test_mar_k.py index 45e21f47d9..9b42c6599a 100644 --- a/tests/metrics/recsys/test_mar_k.py +++ b/tests/metrics/recsys/test_mar_k.py @@ -16,7 +16,7 @@ def test_mar_values(): metric = MARKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() diff --git a/tests/metrics/recsys/test_ndcg_k.py b/tests/metrics/recsys/test_ndcg_k.py index fe500cd482..8384a053a6 100644 --- a/tests/metrics/recsys/test_ndcg_k.py +++ b/tests/metrics/recsys/test_ndcg_k.py @@ -16,7 +16,7 @@ def test_ndcg_values(): metric = NDCGKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() diff --git a/tests/metrics/recsys/test_precision_top_k.py b/tests/metrics/recsys/test_precision_top_k.py index 335ce80f40..c33aa9bdec 100644 --- a/tests/metrics/recsys/test_precision_top_k.py +++ b/tests/metrics/recsys/test_precision_top_k.py @@ -17,7 +17,7 @@ def test_precision_value(): metric = PrecisionTopKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() @@ -38,7 +38,7 @@ def test_precision_value_judged_only(): metric = PrecisionTopKMetric(k=3, no_feedback_users=True) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() diff --git a/tests/metrics/recsys/test_recall_top_k.py b/tests/metrics/recsys/test_recall_top_k.py index 4faa9547b5..7b63fad791 100644 --- a/tests/metrics/recsys/test_recall_top_k.py +++ b/tests/metrics/recsys/test_recall_top_k.py @@ -17,7 +17,7 @@ def test_recall_values(): metric = RecallTopKMetric(k=2) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result() @@ -59,7 +59,7 @@ def test_recsll_include_no_feedback(): metric = RecallTopKMetric(k=2, no_feedback_users=True) report = Report(metrics=[metric]) - column_mapping = ColumnMapping(recomendations_type="rank") + column_mapping = ColumnMapping(recommendations_type="rank") report.run(reference_data=None, current_data=current, column_mapping=column_mapping) results = metric.get_result()