假設用戶正在觀看The Dark Knight,它屬于蝙蝠俠題材的電影。如果我們基于內容設計推薦系統(tǒng),就很可能會推薦其他的蝙蝠俠題材(或超級英雄題材)電影,而忽略了推薦影片本身的質量控制。例如,大多數喜歡The Dark Knight的人對蝙蝠俠題材和超級英雄題材的電影評價并不高,盡管他們的主角相同,題材相近。因此,這個時候有必要引入協(xié)同過濾推薦技術,以提高用戶對推薦內容的驚喜度。
import numpy as np
import pandas as pd# Import or compute the cosine_sim matrix
cosine_sim = pd.read_csv('../data/cosine_sim.csv')
# Import or compute the cosine sim mapping matrix
cosine_sim_map = pd.read_csv('../data/cosine_sim_map.csv', header=None)# Convert cosine_sim_map into a Pandas Series
cosine_sim_map = cosine_sim_map.set_index(0)
cosine_sim_map = cosine_sim_map[1]# Build the SVD based Collaborative filter
from surprise import SVD, Reader, Datasetreader = Reader()
ratings = pd.read_csv('../data/ratings_small.csv')
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
data.split(n_folds=5)
svd = SVD()
trainset = data.build_full_trainset()
svd.train(trainset)# Build title to ID and ID to title mappings
id_map = pd.read_csv('../data/movie_ids.csv')
id_to_title = id_map.set_index('id')
title_to_id = id_map.set_index('title')# Import or compute relevant metadata of the movies
smd = pd.read_csv('../data/metadata_small.csv')def hybrid(userId, title):# Extract the cosine_sim index of the movieidx = cosine_sim_map[title]# Extract the TMDB ID of the movietmdbId = title_to_id.loc[title]['id']# Extract the movie ID internally assigned by the datasetmovie_id = title_to_id.loc[title]['movieId']# Extract the similarity scores and their corresponding index for every movie from the cosine_sim matrixsim_scores = list(enumerate(cosine_sim[str(int(idx))]))# Sort the (index, score) tuples in decreasing order of similarity scoressim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)# Select the top 25 tuples, excluding the first # (as it is the similarity score of the movie with itself)sim_scores = sim_scores[1:26]# Store the cosine_sim indices of the top 25 movies in a listmovie_indices = [i[0] for i in sim_scores]# Extract the metadata of the aforementioned moviesmovies = smd.iloc[movie_indices][['title', 'vote_count', 'vote_average', 'year', 'id']]# Compute the predicted ratings using the SVD filtermovies['est'] = movies['id'].apply(lambda x: svd.predict(userId, id_to_title.loc[x]['movieId']).est)# Sort the movies in decreasing order of predicted ratingmovies = movies.sort_values('est', ascending=False)# Return the top 10 movies as recommendationsreturn movies.head(10)