-
Notifications
You must be signed in to change notification settings - Fork 0
/
movie-recommender.py
64 lines (53 loc) · 2.66 KB
/
movie-recommender.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM
from lightfm.evaluation import auc_score, precision_at_k, recall_at_k
def sample_recommendation(model, train_data, item_labels, user_ids, n_known=3, n_results=2):
n_users, n_items = train_data.shape
for user_id in user_ids:
known_positives = item_labels[train_data.tocsr()[user_id].indices]
scores = model.predict(user_id, np.arange(n_items))
top_items = item_labels[np.argsort(-scores)]
top_items_filtered = [item for item in top_items if item not in known_positives]
print("User %s" % user_id)
print(" Known positives:")
for x in known_positives[:n_known]:
print(" %s" % x)
print(" Recommended:")
for x in top_items_filtered[:n_results]:
print(" %s" % x)
# fetch items with positive ratings (account for positive feedback only)
data = fetch_movielens(min_rating = 4.0)
# take train data and test data
train_set = data['train']
test_set = data['test']
# take labels for all items
item_labels = data['item_labels']
# take item features
item_features = data['item_features']
# show data
print(repr(train_set))
print(repr(test_set))
# define model (hybrid method)
model = LightFM(loss = 'warp',
random_state=2016,
no_components=150)
# train model
model.fit(train_set, item_features=item_features, epochs=100, num_threads=2)
# get recommendations
users_ids_list = [3, 25, 451, 737, 901]
known_items_to_show = 5
recommendations_to_show = 5
sample_recommendation(model=model, train_data=train_set, item_labels=item_labels, user_ids=users_ids_list, n_known=known_items_to_show, n_results=recommendations_to_show)
patk = precision_at_k(model, test_set, train_interactions=train_set, k=recommendations_to_show,
user_features=None, item_features=item_features, preserve_rows=True, num_threads=1, check_intersections=True)
ratk = recall_at_k(model, test_set, train_interactions=train_set, k=recommendations_to_show,
user_features=None, item_features=item_features, preserve_rows=True, num_threads=1, check_intersections=True)
mean_auc_score = auc_score(model, test_set, item_features=item_features, user_features=None, num_threads=1).mean()
print('\nFor users: ', users_ids_list)
print('\nPrecision at k (proportion of recommended items in the top-k set that are relevant) with k = ', recommendations_to_show)
print(patk[users_ids_list])
print('\nRecall at k (proportion of relevant items found in the top-k recommendations) with k = ', recommendations_to_show)
print(ratk[users_ids_list])
print('\nOverall Mean AUC score')
print(mean_auc_score)