Skip to content

Commit

Permalink
Fix_typo (#812)
Browse files Browse the repository at this point in the history
Co-authored-by: 0lgaF <olgafilippova@MacBook-Air-Olga.local>
  • Loading branch information
0lgaF and 0lgaF authored Oct 13, 2023
1 parent 26e8bd9 commit 2c7f36e
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 63 deletions.
86 changes: 43 additions & 43 deletions examples/how_to_questions/how_to_calculate_recsys_metrics.ipynb
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"nbformat_minor": 2,
"metadata": {
"colab": {
"provenance": []
Expand All @@ -17,19 +17,20 @@
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "8aJYBZFNMyXc"
},
"outputs": [],
"source": [
"try:\n",
" import evidently\n",
"except:\n",
" !pip install git+https://github.com/evidentlyai/evidently.git"
]
],
"outputs": [],
"metadata": {
"id": "8aJYBZFNMyXc"
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"import pandas as pd\n",
"import numpy as np\n",
Expand All @@ -40,22 +41,21 @@
"\n",
"import requests"
],
"outputs": [],
"metadata": {
"id": "UfuNPLwjO99K"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"!pip install implicit"
],
"outputs": [],
"metadata": {
"id": "8A_dH0K0082d"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "markdown",
Expand All @@ -68,6 +68,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"content = requests.get(\"http://files.grouplens.org/datasets/movielens/ml-100k.zip\").content\n",
"\n",
Expand All @@ -77,14 +78,14 @@
" movies = arc.read(\"ml-100k/u.item\").decode(encoding='latin-1').split(\"\\n\")\n",
" users = arc.read(\"ml-100k/u.user\").decode(encoding='latin-1').split(\"\\n\")"
],
"outputs": [],
"metadata": {
"id": "f1wLolXpM02U"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"columns = ['user_id', 'movie_id', 'rating', 'timestamp']\n",
"\n",
Expand All @@ -107,11 +108,10 @@
"movies.drop(columns=['-', 'url'], inplace=True)\n",
"movies[genres] = movies[genres].astype(int)"
],
"outputs": [],
"metadata": {
"id": "-V1w4P5LeV4X"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "markdown",
Expand All @@ -124,6 +124,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"def transform_predictions(k, user_ids, item_ids):\n",
" return pd.DataFrame(\n",
Expand All @@ -138,11 +139,10 @@
" preds['rank'] = preds.groupby('user_id')['rank'].transform(lambda x: x.fillna(x.max() + 1))\n",
" return preds"
],
"outputs": [],
"metadata": {
"id": "MqP6bLDv92hY"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "markdown",
Expand All @@ -155,6 +155,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"from implicit.cpu.als import AlternatingLeastSquares\n",
"from scipy.sparse import csr_matrix\n",
Expand All @@ -163,66 +164,66 @@
"als_model = AlternatingLeastSquares(factors=20, iterations=5, random_state=0)\n",
"als_model.fit(csr_matrix(pivot_table))"
],
"outputs": [],
"metadata": {
"id": "-FQDsHEA3OKw"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"test_users = test.user_id.unique()\n",
"len(test_users)"
],
"outputs": [],
"metadata": {
"id": "LLVvpQOvFIzl"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"ref_true = test[test.user_id.isin(test_users[:471])]\n",
"curr_true = test[test.user_id.isin(test_users[471:])]"
],
"outputs": [],
"metadata": {
"id": "Sa8bO0CHFAJV"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"ids, scores = als_model.recommend(ref_true.user_id.unique() - 1, csr_matrix(pivot_table.loc[ref_true.user_id.unique()]), N=30, filter_already_liked_items=True)\n",
"ref = get_full_df(30, ref_true.user_id.unique(), ids, ref_true)\n",
"\n",
"ids, scores = als_model.recommend(curr_true.user_id.unique() - 1, csr_matrix(pivot_table.loc[curr_true.user_id.unique()]), N=30, filter_already_liked_items=True)\n",
"curr = get_full_df(30, curr_true.user_id.unique(), ids, curr_true)"
],
"outputs": [],
"metadata": {
"id": "LipvjnXIH3Y5"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"from evidently.metrics import PrecisionTopKMetric, RecallTopKMetric, FBetaTopKMetric, MAPKMetric, NDCGKMetric\n",
"from evidently.pipeline.column_mapping import ColumnMapping\n",
"from evidently.report import Report"
],
"outputs": [],
"metadata": {
"id": "vxU6s88ism0_"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [
"report = Report(metrics=[\n",
" PrecisionTopKMetric(k=5),\n",
Expand All @@ -231,24 +232,23 @@
" MAPKMetric(k=5),\n",
" NDCGKMetric(k=5),\n",
"])\n",
"column_mapping=ColumnMapping(recomendations_type='rank', target='rating', prediction='rank')\n",
"column_mapping=ColumnMapping(recommendations_type='rank', target='rating', prediction='rank')\n",
"report.run(reference_data=ref.fillna(0), current_data=curr.fillna(0), column_mapping=column_mapping)\n",
"report"
],
"outputs": [],
"metadata": {
"id": "7KIQreI6tKEA"
},
"execution_count": null,
"outputs": []
}
},
{
"cell_type": "code",
"execution_count": null,
"source": [],
"outputs": [],
"metadata": {
"id": "jSiTOBzhOl1s"
},
"execution_count": null,
"outputs": []
}
}
]
}
16 changes: 8 additions & 8 deletions src/evidently/calculations/recommender_systems.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def collect_dataset(
users: pd.Series,
target: pd.Series,
preds: pd.Series,
recomendations_type: str,
recommendations_type: str,
min_rel_score: Optional[int],
no_feedback_users: bool,
bin_data: bool,
Expand All @@ -18,7 +18,7 @@ def collect_dataset(
df.columns = ["users", "target", "preds"]
if min_rel_score:
df["target"] = (df["target"] >= min_rel_score).astype(int)
if recomendations_type == "score":
if recommendations_type == "score":
df["preds"] = df.groupby("users")["preds"].transform("rank", ascending=False)
if bin_data:
df["target"] = (df["target"] > 0).astype(int)
Expand All @@ -37,10 +37,10 @@ def get_curr_and_ref_df(
if target_column is None or prediction is None:
raise ValueError("Target and prediction were not found in data.")
_, target_current, target_reference = data.get_data(target_column.column_name)
recomendations_type = data.column_mapping.recomendations_type
if recomendations_type is None:
recomendations_type = "scores"
if recomendations_type == "rank" and prediction.predicted_values is not None:
recommendations_type = data.column_mapping.recommendations_type
if recommendations_type is None:
recommendations_type = "scores"
if recommendations_type == "rank" and prediction.predicted_values is not None:
pred_name = prediction.predicted_values.column_name
elif prediction.prediction_probas is not None:
pred_name = prediction.prediction_probas[0].column_name
Expand All @@ -53,7 +53,7 @@ def get_curr_and_ref_df(
user_current,
target_current,
prediction_current,
recomendations_type,
recommendations_type,
min_rel_score,
no_feedback_users,
bin_data,
Expand All @@ -65,7 +65,7 @@ def get_curr_and_ref_df(
user_reference,
target_reference,
prediction_reference,
recomendations_type,
recommendations_type,
min_rel_score,
no_feedback_users,
bin_data,
Expand Down
2 changes: 1 addition & 1 deletion src/evidently/pipeline/column_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ColumnMapping:
embeddings: Optional[Embeddings] = None
user_id: Optional[str] = "user_id"
item_id: Optional[str] = "item_id"
recomendations_type: Optional[str] = "score"
recommendations_type: Optional[str] = "score"

def is_classification_task(self):
return self.task == TaskType.CLASSIFICATION_TASK
Expand Down
4 changes: 2 additions & 2 deletions src/evidently/utils/data_preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ def _prediction_column(
raise ValueError("Prediction type is categorical but task is regression")
if prediction_type == ColumnType.Numerical:
return PredictionColumns(predicted_values=ColumnDefinition(prediction, prediction_type))
if mapping is not None and mapping.recomendations_type == "rank":
if mapping is not None and mapping.recommendations_type == "rank":
return PredictionColumns(predicted_values=ColumnDefinition(prediction, prediction_type))
if task == TaskType.RECOMMENDER_SYSTEMS and mapping is not None and mapping.recomendations_type == "score":
if task == TaskType.RECOMMENDER_SYSTEMS and mapping is not None and mapping.recommendations_type == "score":
return PredictionColumns(prediction_probas=[ColumnDefinition(prediction, prediction_type)])
if task is None:
if prediction_type == ColumnType.Numerical and target_type == ColumnType.Categorical:
Expand Down
2 changes: 1 addition & 1 deletion tests/metrics/recsys/test_f_beta_top_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_fbeta_values():

metric = FBetaTopKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down
4 changes: 2 additions & 2 deletions tests/metrics/recsys/test_map_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_map_value():

metric = MAPKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand All @@ -38,7 +38,7 @@ def test_map_value_judged_only():

metric = MAPKMetric(k=3, no_feedback_users=True)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down
2 changes: 1 addition & 1 deletion tests/metrics/recsys/test_mar_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_mar_values():

metric = MARKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down
2 changes: 1 addition & 1 deletion tests/metrics/recsys/test_ndcg_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_ndcg_values():

metric = NDCGKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down
4 changes: 2 additions & 2 deletions tests/metrics/recsys/test_precision_top_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_precision_value():

metric = PrecisionTopKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand All @@ -38,7 +38,7 @@ def test_precision_value_judged_only():

metric = PrecisionTopKMetric(k=3, no_feedback_users=True)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down
4 changes: 2 additions & 2 deletions tests/metrics/recsys/test_recall_top_k.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_recall_values():

metric = RecallTopKMetric(k=2)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down Expand Up @@ -59,7 +59,7 @@ def test_recsll_include_no_feedback():

metric = RecallTopKMetric(k=2, no_feedback_users=True)
report = Report(metrics=[metric])
column_mapping = ColumnMapping(recomendations_type="rank")
column_mapping = ColumnMapping(recommendations_type="rank")
report.run(reference_data=None, current_data=current, column_mapping=column_mapping)

results = metric.get_result()
Expand Down

0 comments on commit 2c7f36e

Please sign in to comment.