當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

推荐经典算法实现之BPMF(pymc3+MovieLen)

發布時間：2025/4/16 编程问答 18 豆豆

生活随笔收集整理的這篇文章主要介紹了推荐经典算法实现之BPMF(pymc3+MovieLen) 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

BPMF是用貝葉斯推斷方法求解MF的概率模型，參考：https://gist.github.com/macks22/00a17b1d374dfc267a9a

1、利用其本身數據集的代碼如下：

# -*- Encoding:UTF-8 -*- ''' @author: Jason.F @data: 2019.07.22 @function: Implementing BPMFDataset: Movielen Dataset(ml-1m) Evaluating: hitradio,ndcghttps://www.cs.toronto.edu/~amnih/papers/bpmf.pdf @reference: https://gist.github.com/macks22/00a17b1d374dfc267a9a ''' import sys import time import loggingimport pymc3 as pm import numpy as np import pandas as pd import theano import theano.tensor as t import scipy as sp import mathDATA_NOT_FOUND = -1# data from: https://gist.github.com/macks22/b40ac9c685e920ad3ca2 def read_jester_data(fname='/data/tmpexec/jester-dense-subset-100x20.csv'):"""Read dense Jester dataset and split train/test data randomly.We use a 0.9:0.1 Train:Test split."""logging.info('reading data')try:data = pd.read_csv(fname)except IOError as err:print (str(err))url = 'https://gist.github.com/macks22/b40ac9c685e920ad3ca2'print ('download from: %s' % url)sys.exit(DATA_NOT_FOUND)# Calculate split sizes.logging.info('splitting train/test sets')n, m = data.shape # # users, # jokesN = n * m # # cells in matrixtest_size = int(N / 10) # use 10% of data as test settrain_size = N - test_size # and remainder for training# Prepare train/test ndarrays.train = data.copy().valuestest = np.ones(data.shape) * np.nan# Draw random sample of training data to use for testing.tosample = np.where(~np.isnan(train)) # only sample non-missing valuesidx_pairs = list(zip(tosample[0], tosample[1])) # zip row/col indicesindices = np.arange(len(idx_pairs)) # indices of row/col index pairssample = np.random.choice(indices, replace=False, size=test_size) # draw sample# Transfer random sample from train set to test set.for idx in sample:idx_pair = idx_pairs[idx] # retrieve sampled index pairtest[idx_pair] = train[idx_pair] # transfer to test settrain[idx_pair] = np.nan # remove from train set# Verify everything worked properlyassert(np.isnan(train).sum() == test_size)assert(np.isnan(test).sum() == train_size)# Return the two numpy ndarraysreturn train, testdef build_pmf_model(train, alpha=2, dim=10, std=0.01):"""Construct the Probabilistic Matrix Factorization model using pymc3.Note that the `testval` param for U and V initialize the model away from0 using a small amount of Gaussian noise.:param np.ndarray train: Training data (observed) to learn the model on.:param int alpha: Fixed precision to use for the rating likelihood function.:param int dim: Dimensionality of the model; rank of low-rank approximation.:param float std: Standard deviation for Gaussian noise in model initialization."""# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()# Low precision reflects uncertainty; prevents overfitting.# We use point estimates from the data to intialize.# Set to mean variance across users and items.alpha_u = 1 / train.var(axis=1).mean()alpha_v = 1 / train.var(axis=0).mean()logging.info('building the PMF model')n, m = train.shapewith pm.Model() as pmf:U = pm.MvNormal('U', mu=0, tau=alpha_u * np.eye(dim),shape=(n, dim), testval=np.random.randn(n, dim) * std)V = pm.MvNormal('V', mu=0, tau=alpha_v * np.eye(dim),shape=(m, dim), testval=np.random.randn(m, dim) * std)R = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones(train.shape),observed=train)logging.info('done building PMF model')return pmfdef build_bpmf_model(train, alpha=2, dim=10, std=0.01):"""Build the original BPMF model, which we cannot sample from due tocurrent limitations in pymc3's implementation of the Wishart distribution."""n, m = train.shapebeta_0 = 1 # scaling factor for lambdas; unclear on its use# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()logging.info('building the BPMF model')with pm.Model() as bpmf:# Specify user feature matrixlambda_u = pm.Wishart('lambda_u', n=dim, V=np.eye(dim), shape=(dim, dim),testval=np.random.randn(dim, dim) * std)mu_u = pm.Normal('mu_u', mu=0, tau=beta_0 * lambda_u, shape=dim,testval=np.random.randn(dim) * std)U = pm.MvNormal( 'U', mu=mu_u, tau=lambda_u, shape=(n, dim),testval=np.random.randn(n, dim) * std)# Specify item feature matrixlambda_v = pm.Wishart('lambda_v', n=dim, V=np.eye(dim), shape=(dim, dim),testval=np.random.randn(dim, dim) * std)mu_v = pm.Normal('mu_v', mu=0, tau=beta_0 * lambda_v, shape=dim,testval=np.random.randn(dim) * std)V = pm.MvNormal('V', mu=mu_v, tau=lambda_v, shape=(m, dim),testval=np.random.randn(m, dim) * std)# Specify rating likelihood functionR = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones((n, m)),observed=train)logging.info('done building the BPMF model')return bpmfdef build_mod_bpmf_model(train, alpha=2, dim=10, std=0.01):"""Build the modified BPMF model using pymc3. The original model usesWishart priors on the covariance matrices. Unfortunately, the Wishartdistribution in pymc3 is currently not suitable for sampling. Thisversion decomposes the covariance matrix into:diag(sigma) \dot corr_matrix \dot diag(std).We use uniform priors on the standard deviations (sigma) and LKJCorrpriors on the correlation matrices (corr_matrix):sigma ~ Uniformcorr_matrix ~ LKJCorr(n=1, p=dim)"""n, m = train.shapebeta_0 = 1 # scaling factor for lambdas; unclear on its use# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()# We will use separate priors for sigma and correlation matrix.# In order to convert the upper triangular correlation values to a# complete correlation matrix, we need to construct an index matrix:n_elem = int(dim * (dim - 1) / 2)tri_index = np.zeros([dim, dim], dtype=int)tri_index[np.triu_indices(dim, k=1)] = np.arange(n_elem)tri_index[np.triu_indices(dim, k=1)[::-1]] = np.arange(n_elem)logging.info('building the BPMF model')with pm.Model() as bpmf:# Specify user feature matrixsigma_u = pm.Uniform('sigma_u', shape=dim)corr_triangle_u = pm.LKJCorr('corr_u', n=1, p=dim, testval=np.random.randn(n_elem) * std)corr_matrix_u = corr_triangle_u[tri_index]corr_matrix_u = t.fill_diagonal(corr_matrix_u, 1)cov_matrix_u = t.diag(sigma_u).dot(corr_matrix_u.dot(t.diag(sigma_u)))lambda_u = t.nlinalg.matrix_inverse(cov_matrix_u)mu_u = pm.Normal('mu_u', mu=0, tau=beta_0 * t.diag(lambda_u), shape=dim,testval=np.random.randn(dim) * std)U = pm.MvNormal('U', mu=mu_u, tau=lambda_u, shape=(n, dim),testval=np.random.randn(n, dim) * std)# Specify item feature matrixsigma_v = pm.Uniform('sigma_v', shape=dim)corr_triangle_v = pm.LKJCorr('corr_v', n=1, p=dim,testval=np.random.randn(n_elem) * std)corr_matrix_v = corr_triangle_v[tri_index]corr_matrix_v = t.fill_diagonal(corr_matrix_v, 1)cov_matrix_v = t.diag(sigma_v).dot(corr_matrix_v.dot(t.diag(sigma_v)))lambda_v = t.nlinalg.matrix_inverse(cov_matrix_v)mu_v = pm.Normal('mu_v', mu=0, tau=beta_0 * t.diag(lambda_v), shape=dim,testval=np.random.randn(dim) * std)V = pm.MvNormal( 'V', mu=mu_v, tau=lambda_v, shape=(m, dim),testval=np.random.randn(m, dim) * std)# Specify rating likelihood functionR = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones((n, m)),observed=train)logging.info('done building the BPMF model')return bpmfif __name__ == "__main__":logging.basicConfig(level=logging.INFO,format='[%(asctime)s]: %(message)s')# Read data and build PMF model.train, test = read_jester_data()pmf = build_pmf_model(train)# Find mode of posterior using optimizationwith pmf:tstart = time.time()logging.info('finding PMF MAP using Powell optimization')#start = pm.find_MAP(fmin=sp.optimize.fmin_powell)start = pm.find_MAP()elapsed = time.time() - tstartlogging.info('found PMF MAP in %d seconds' % int(elapsed))# Build the modified BPMF model using same default params as PMF.mod_bpmf = build_mod_bpmf_model(train)# Use PMF MAP to initialize sampling for modified BPMF.for key in mod_bpmf.test_point:if key not in start:start[key] = mod_bpmf.test_point[key]# Attempt to sample with modified BPMF# (this part raises PositiveDefiniteError when using the normal BPMF model).with mod_bpmf:nsamples = 100njobs = 2logging.info( 'drawing %d MCMC samples using %d jobs' % (nsamples, njobs))step = pm.NUTS(scaling=start)trace = pm.sample(nsamples, step, start=start, njobs=njobs) with mod_bpmf:ppc = pm.sample_posterior_predictive(trace, progressbar=True)nR = np.mean(ppc['R'],0)#three dims, calcuate the mean with the first dim def getrmse(predictions, targets):return np.sqrt(((predictions - targets) ** 2).mean())rmses=[]for i in range(test.shape[0]):for j in range(test.shape[1]):if math.isnan(test[i][j]) == False:rmse = getrmse(test[i][j],nR[i][j])rmses.append(rmse)print (np.mean(rmses))#4.120942853091463

2、用Movielen-1m數據集無法采樣下去，原因未知，有興趣者可研究，代碼如下：

# -*- Encoding:UTF-8 -*- ''' @author: Jason.F @data: 2019.07.22 @function: Implementing BPMF by MCMCDataset: Movielen Dataset(ml-1m) Evaluating: hitradio,ndcghttps://www.cs.toronto.edu/~amnih/papers/bpmf.pdf @reference: https://gist.github.com/macks22/00a17b1d374dfc267a9a ''' import sys import time import loggingimport pymc3 as pm import numpy as np import pandas as pd import theano import theano.tensor as t import heapq import mathdef getTraindata():data = []filePath = '/data/fjsdata/ctKngBase/ml/ml-1m.train.rating'u = 0i = 0maxr = 0.0with open(filePath, 'r') as f:for line in f:if line:lines = line[:-1].split("\t")user = int(lines[0])item = int(lines[1])score = float(lines[2])data.append((user, item, score))if user > u: u = userif item > i: i = itemif score > maxr: maxr = scoreprint("Loading Success!\n""Data Info:\n""\tUser Num: {}\n""\tItem Num: {}\n""\tData Size: {}".format(u, i, len(data)))R = np.zeros([u+1, i+1], dtype=np.float32)for i in data:user = i[0]item = i[1]rating = i[2]R[user][item] = ratingreturn R def getTestdata():testset = []filePath = '/data/fjsdata/ctKngBase/ml/ml-1m.test.negative'with open(filePath, 'r') as fd:line = fd.readline()while line != None and line != '':arr = line.split('\t')u = eval(arr[0])[0]testset.append([u, eval(arr[0])[1]])#one postive itemfor i in arr[1:]:testset.append([u, int(i)]) #99 negative itemsline = fd.readline()return testsetdef build_pmf_model(train, alpha=2, dim=8, std=0.01):"""Construct the Probabilistic Matrix Factorization model using pymc3.Note that the `testval` param for U and V initialize the model away from0 using a small amount of Gaussian noise.:param np.ndarray train: Training data (observed) to learn the model on.:param int alpha: Fixed precision to use for the rating likelihood function.:param int dim: Dimensionality of the model; rank of low-rank approximation.:param float std: Standard deviation for Gaussian noise in model initialization."""# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()# Low precision reflects uncertainty; prevents overfitting.# We use point estimates from the data to intialize.# Set to mean variance across users and items.alpha_u = 1 / train.var(axis=1).mean()alpha_v = 1 / train.var(axis=0).mean()logging.info('building the PMF model')n, m = train.shapewith pm.Model() as pmf:U = pm.MvNormal('U', mu=0, tau=alpha_u * np.eye(dim),shape=(n, dim), testval=np.random.randn(n, dim) * std)V = pm.MvNormal('V', mu=0, tau=alpha_v * np.eye(dim),shape=(m, dim), testval=np.random.randn(m, dim) * std)R = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones(train.shape),observed=train)logging.info('done building PMF model')return pmfdef build_bpmf_model(train, alpha=2, dim=8, std=0.01):"""Build the original BPMF model, which we cannot sample from due tocurrent limitations in pymc3's implementation of the Wishart distribution."""n, m = train.shapebeta_0 = 1 # scaling factor for lambdas; unclear on its use# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()logging.info('building the BPMF model')with pm.Model() as bpmf:# Specify user feature matrixlambda_u = pm.Wishart('lambda_u', n=dim, V=np.eye(dim), shape=(dim, dim),testval=np.random.randn(dim, dim) * std)mu_u = pm.Normal('mu_u', mu=0, tau=beta_0 * lambda_u, shape=dim,testval=np.random.randn(dim) * std)U = pm.MvNormal( 'U', mu=mu_u, tau=lambda_u, shape=(n, dim),testval=np.random.randn(n, dim) * std)# Specify item feature matrixlambda_v = pm.Wishart('lambda_v', n=dim, V=np.eye(dim), shape=(dim, dim),testval=np.random.randn(dim, dim) * std)mu_v = pm.Normal('mu_v', mu=0, tau=beta_0 * lambda_v, shape=dim,testval=np.random.randn(dim) * std)V = pm.MvNormal('V', mu=mu_v, tau=lambda_v, shape=(m, dim),testval=np.random.randn(m, dim) * std)# Specify rating likelihood functionR = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones((n, m)),observed=train)logging.info('done building the BPMF model')return bpmfdef build_mod_bpmf_model(train, alpha=2, dim=8, std=0.01):"""Build the modified BPMF model using pymc3. The original model usesWishart priors on the covariance matrices. Unfortunately, the Wishartdistribution in pymc3 is currently not suitable for sampling. Thisversion decomposes the covariance matrix into:diag(sigma) \dot corr_matrix \dot diag(std).We use uniform priors on the standard deviations (sigma) and LKJCorrpriors on the correlation matrices (corr_matrix):sigma ~ Uniformcorr_matrix ~ LKJCorr(n=1, p=dim)"""n, m = train.shapebeta_0 = 1 # scaling factor for lambdas; unclear on its use# Mean value imputation on training data.train = train.copy()nan_mask = np.isnan(train)train[nan_mask] = train[~nan_mask].mean()# We will use separate priors for sigma and correlation matrix.# In order to convert the upper triangular correlation values to a# complete correlation matrix, we need to construct an index matrix:n_elem = int(dim * (dim - 1) / 2)tri_index = np.zeros([dim, dim], dtype=int)tri_index[np.triu_indices(dim, k=1)] = np.arange(n_elem)tri_index[np.triu_indices(dim, k=1)[::-1]] = np.arange(n_elem)logging.info('building the BPMF model')with pm.Model() as bpmf:# Specify user feature matrixsigma_u = pm.Uniform('sigma_u', shape=dim)corr_triangle_u = pm.LKJCorr('corr_u', n=1, p=dim, testval=np.random.randn(n_elem) * std)corr_matrix_u = corr_triangle_u[tri_index]corr_matrix_u = t.fill_diagonal(corr_matrix_u, 1)cov_matrix_u = t.diag(sigma_u).dot(corr_matrix_u.dot(t.diag(sigma_u)))lambda_u = t.nlinalg.matrix_inverse(cov_matrix_u)mu_u = pm.Normal('mu_u', mu=0, tau=beta_0 * t.diag(lambda_u), shape=dim,testval=np.random.randn(dim) * std)U = pm.MvNormal('U', mu=mu_u, tau=lambda_u, shape=(n, dim),testval=np.random.randn(n, dim) * std)# Specify item feature matrixsigma_v = pm.Uniform('sigma_v', shape=dim)corr_triangle_v = pm.LKJCorr('corr_v', n=1, p=dim,testval=np.random.randn(n_elem) * std)corr_matrix_v = corr_triangle_v[tri_index]corr_matrix_v = t.fill_diagonal(corr_matrix_v, 1)cov_matrix_v = t.diag(sigma_v).dot(corr_matrix_v.dot(t.diag(sigma_v)))lambda_v = t.nlinalg.matrix_inverse(cov_matrix_v)mu_v = pm.Normal('mu_v', mu=0, tau=beta_0 * t.diag(lambda_v), shape=dim,testval=np.random.randn(dim) * std)V = pm.MvNormal( 'V', mu=mu_v, tau=lambda_v, shape=(m, dim),testval=np.random.randn(m, dim) * std)# Specify rating likelihood functionR = pm.Normal('R', mu=t.dot(U, V.T), tau=alpha * np.ones((n, m)),observed=train)logging.info('done building the BPMF model')return bpmfdef getHitRatio(ranklist, targetItem):for item in ranklist:if item == targetItem:return 1return 0 def getNDCG(ranklist, targetItem):for i in range(len(ranklist)):item = ranklist[i]if item == targetItem:return math.log(2) / math.log(i+2)return 0if __name__ == "__main__":logging.basicConfig(level=logging.INFO,format='[%(asctime)s]: %(message)s')# Read data and build PMF model.train = getTraindata()bpmf = build_mod_bpmf_model(train, dim=8)#dim is the number of latent factorswith bpmf:# sample with BPMFtstart = time.time()logging.info('Starting BPMF training')#start = pm.find_MAP() step = pm.NUTS()#trace = pm.sample(1000, step, start=start)trace = pm.sample(100, step)elapsed = time.time() - tstart logging.info('Completed BPMF in %d seconds' % int(elapsed))with bpmf:#evaluationtestset = getTestdata()ppc = pm.sample_posterior_predictive(trace, progressbar=True)nR = np.mean(ppc['R'],0)#three dims, calcuate the mean with the first dim for posteriorhits = []ndcgs = []prev_u = testset[0][0]pos_i = testset[0][1]scorelist = []for u, i in testset:if prev_u == u:scorelist.append([i,nR[u,i]])else:map_item_score = {}for item, rate in scorelist: #turn dictmap_item_score[item] = rateranklist = heapq.nlargest(10, map_item_score, key=map_item_score.get)#default Topn=10hr = getHitRatio(ranklist, pos_i)hits.append(hr)ndcg = getNDCG(ranklist, pos_i)ndcgs.append(ndcg)#next userscorelist = []prev_u = upos_i = iscorelist.append([i,nR[u,i]])hitratio,ndcg = np.array(hits).mean(), np.array(ndcgs).mean()print("hr: {}, NDCG: {}, At K {}".format(hitratio, ndcg, 8))

訓練一直卡在：

Loading Success! Data Info:User Num: 6039Item Num: 3705Data Size: 994169 [2019-07-23 07:26:00,509]: building the BPMF model [2019-07-23 07:26:21,704]: done building the BPMF model [2019-07-23 07:26:21,709]: finding PMF MAP using Powell optimization Only 100 samples in chain. [2019-07-23 07:26:40,130]: Only 100 samples in chain. Multiprocess sampling (4 chains in 4 jobs) [2019-07-23 07:26:40,147]: Multiprocess sampling (4 chains in 4 jobs) NUTS: [V, mu_v, corr_v, sigma_v, U, mu_u, corr_u, sigma_u] [2019-07-23 07:26:40,153]: NUTS: [V, mu_v, corr_v, sigma_v, U, mu_u, corr_u, sigma_u] Sampling 4 chains: 0%| | 12/2400 [01:47<10:26:46, 15.75s/draws]

BPMF是用貝葉斯MCMC推斷方法求解MF概率模型，和筆者下一篇BMF模型思路一致。

總結

以上是生活随笔為你收集整理的推荐经典算法实现之BPMF(pymc3+MovieLen)的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Tensorflow矩阵过大问题的解决
下一篇：推荐算法实现之BMF(pymc3+Mov