-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
30 lines (22 loc) · 1.22 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Import the pandas library as pd
import pandas as pd
# Load the movie dataset from a CSV file and select only the 'movieId' and 'title' columns
movie = pd.read_csv("movie.csv")
movie = movie.loc[:,["movieId","title"]]
# Load the rating dataset from a CSV file and select 'userId', 'movieId', and 'rating' columns
rating = pd.read_csv("rating.csv")
rating = rating.loc[:,["userId","movieId","rating"]]
# Merge the movie and rating datasets based on common columns
data = pd.merge(movie, rating)
# Truncate the data to the first 2,000,000 rows
data = data.iloc[:2000000,:]
# Create a pivot table where rows are users, columns are movie titles, and values are ratings
pivot_table = data.pivot_table(index = ["userId"], columns = ["title"], values = "rating")
# Define a function to find movies similar to a given movie
def movie(input):
# Select the column corresponding to the input movie to find users who watched it
movie_watched = pivot_table[input]
# Calculate the correlation of the watched movie with other movies in the pivot table
similarity_with_other_movies = pivot_table.corrwith(movie_watched)
# Return the titles of the top 5 most similar movies
return similarity_with_other_movies.head().index.tolist()