-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecommenders.py
180 lines (131 loc) · 7.34 KB
/
recommenders.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import numpy as np
import pandas
class popularity_recommender_py():
def __init__(self):
self.train_data = None
self.user_id = None
self.item_id = None
self.popularity_recommendations = None
#Here we create a popularity based recommendation engine
def create(self, train_data, user_id, item_id):
self.train_data = train_data
self.user_id = user_id
self.item_id = item_id
#Counting a number of user_ids for each unique song as recommendation score
train_data_grouped = train_data.groupby([self.item_id]).agg({self.user_id: 'count'}).reset_index()
train_data_grouped.rename(columns = {'user_id': 'score'},inplace=True)
#Sort the songs based upon recommendation score
train_data_sort = train_data_grouped.sort_values(['score', self.item_id], ascending = [0,1])
#Generate a recommendation rank based upon score
train_data_sort['Rank'] = train_data_sort['score'].rank(ascending=0, method='first')
#Get the top 10 recommendations
self.popularity_recommendations = train_data_sort.head(10)
#Use the popularity based recommender system model to make racommendation
def recommend(self, user_id):
user_recommendations = self.popularity_recommendations
#Add user_id column for which the recommendations are being generated
user_recommendations['user_id'] = user_id
#Bring user_id column to the front
cols = user_recommendations.columns.tolist()
cols = cols[-1:] + cols[:-1]
user_recommendations = user_recommendations[cols]
return user_recommendations
#Class for Item similarity based Recommender System model
class item_similarity_recommender_py():
def __init__(self):
self.train_data = None
self.user_id = None
self.item_id = None
self.cooccurence_matrix = None
self.songs_dict = None
self.rev_songs_dict = None
self.item_similarity_recommendations = None
#Get unique items (songs) corresponding to a given user
def get_user_items(self, user):
user_data = self.train_data[self.train_data[self.user_id] == user]
#getting user_items data in list
user_items = list(user_data[self.item_id].unique())
return user_items
#Get unique users for a given item (song)
def get_item_users(self, item):
item_data = self.train_data[self.train_data[self.item_id] == item]
#getting in item_users in set
item_users = set(item_data[self.user_id].unique())
return item_users
#Get unique items (songs) in the training data
def get_all_items_train_data(self):
all_items = list(self.train_data[self.item_id].unique())
return all_items
#Construct cooccurence matrix
def construct_cooccurence_matrix(self, user_songs, all_songs):
#Get users for all songs in user_songs.
user_songs_users = []
for i in range(0, len(user_songs)):
user_songs_users.append(self.get_item_users(user_songs[i]))
cooccurence_matrix = np.matrix(np.zeros(shape=(len(user_songs), len(all_songs))), float)
#Calculate similarity between user songs and all unique songs in the training data
for i in range(0,len(all_songs)):
#Calculate unique listeners (users) of song (item) i
songs_i_data = self.train_data[self.train_data[self.item_id] == all_songs[i]]
users_i = set(songs_i_data[self.user_id].unique())
for j in range(0,len(user_songs)):
users_j = user_songs_users[j]
users_intersection = users_i.intersection(users_j)
if len(users_intersection) != 0:
users_union = users_i.union(users_j)
cooccurence_matrix[j,i] = float(len(users_intersection))/float(len(users_union))
else:
cooccurence_matrix[j,i] = 0
return cooccurence_matrix
#Use the cooccurence matrix to make top recommendations
def generate_top_recommendations(self, user, cooccurence_matrix, all_songs, user_songs):
print("Non zero values in cooccurence_matrix :%d" % np.count_nonzero(cooccurence_matrix))
#Calculate a weighted average of the scores in cooccurence matrix for all user songs.
user_sim_scores = cooccurence_matrix.sum(axis=0)/float(cooccurence_matrix.shape[0])
user_sim_scores = np.array(user_sim_scores)[0].tolist()
#Sort the indices of user_sim_scores based upon their value and also maintain the corresponding score
sort_index = sorted(((e,i) for i,e in enumerate(list(user_sim_scores))), reverse=True)
#Create a dataframe from the following
columns = ['user_id', 'song', 'score', 'rank']
#index = np.arange(1) # array of numbers for the number of samples
df = pandas.DataFrame(columns=columns)
rank = 1
for i in range(0,len(sort_index)):
if ~np.isnan(sort_index[i][0]) and all_songs[sort_index[i][1]] not in user_songs and rank <= 10:
df.loc[len(df)]=[user,all_songs[sort_index[i][1]],sort_index[i][0],rank]
rank = rank+1
#Handle the case where there are no recommendations
if df.shape[0] == 0:
print("The current user has no songs for training the item similarity based recommendation model.")
return -1
else:
return df
#Create the item similarity based recommender system model
def create(self, train_data, user_id, item_id):
self.train_data = train_data
self.user_id = user_id
self.item_id = item_id
#Use the item similarity based recommender system model to
#make recommendations
def recommend(self, user):
#Get all unique songs for this user
user_songs = self.get_user_items(user)
print("No. of unique songs for the user: %d" % len(user_songs))
#Get all unique items (songs) in the training data
all_songs = self.get_all_items_train_data()
print("no. of unique songs in the training set: %d" % len(all_songs))
#Construct item cooccurence matrix of size
cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
return df_recommendations
#Get similar items to given items
def get_similar_items(self, item_list):
user_songs = item_list
#Get all unique items (songs) in the training data
all_songs = self.get_all_items_train_data()
print("no. of unique songs in the training set: %d" % len(all_songs))
cooccurence_matrix = self.construct_cooccurence_matrix(user_songs, all_songs)
#Use the cooccurence matrix to make recommendations
user = ""
df_recommendations = self.generate_top_recommendations(user, cooccurence_matrix, all_songs, user_songs)
return