#貝葉斯分類器
datasets={'banala':{'long':400,'not_long':100,'sweet':350,'not_sweet':150,'yellow':450,'not_yellow':50},'orange':{'long':0,'not_long':300,'sweet':150,'not_sweet':150,'yellow':300,'not_yellow':0},'other_fruit':{'long':100,'not_long':100,'sweet':150,'not_sweet':50,'yellow':50,'not_yellow':150}}defcount_total(data):#計(jì)算各種水果的總數(shù)count={}total=0for fruit in data :count[fruit]=data[fruit]['yellow']+data[fruit]['not_yellow']total+=count[fruit]return count,total#{'banala': 500, 'orange': 300, 'other_fruit': 200} 1000defcal_base_rates(data):#計(jì)算各種水果的先驗(yàn)概率,不同水果的占比categories,total=count_total(data)cal_base_rates={}for label in categories:priori_prob=categories[label]/totalcal_base_rates[label]=priori_probreturn cal_base_rates#{'banala': 0.5, 'orange': 0.3, 'other_fruit': 0.2}deflikelihold_prob(data):#計(jì)算各個(gè)特征值在已知水果下的概率count,_=count_total(data)likelihold={}for fruit in data:attr_prob={}for attr in data[fruit]:#計(jì)算各個(gè)特征值在已知水果下的概率attr_prob[attr]=data[fruit][attr]/count[fruit]likelihold[fruit]=attr_probreturn likelihold#{'banala': {'long': 0.8, 'not_long': 0.2, 'sweet': 0.7, 'not_sweet': 0.3, 'yellow': 0.9, 'not_yellow': 0.1}, 'orange': {'long': 0.0, 'not_long': 1.0, 'sweet': 0.5, 'not_sweet': # 0.5, 'yellow': 1.0, 'not_yellow': 0.0}, 'other_fruit': {'long': 0.5, 'not_long': 0.5, 'sweet': 0.75, 'not_sweet': 0.25, 'yellow': 0.25, 'not_yellow': 0.75}}defevidence_prob(data):#計(jì)算特征的概率對(duì)分類結(jié)果的影響attrs=list(data['banala'].keys())count,total=count_total(data)evidence_prob={}for attr in attrs:attr_total=0for fruit in data:attr_total+=data[fruit][attr]evidence_prob[attr]=attr_total/totalreturn evidence_prob#{'long': 0.5, 'not_long': 0.5, 'sweet': 0.65, 'not_sweet': 0.35, 'yellow': 0.8, 'not_yellow': 0.2} classnavie_bayes_classifier:#初始化貝葉斯分類器,實(shí)例化時(shí)會(huì)調(diào)用__ini__函數(shù)def__init__(self,data=datasets):self._data=datasetsself._labels=[key for key in self._data.keys()]#不同的水果self._priori_prob=cal_base_rates(self._data)#水果先驗(yàn)概率self._likelihole_prob=likelihold_prob(self._data)#各個(gè)特征值下水果的概率self._evidence_prob=evidence_prob(self._data)#各種特征的概率defget_label(self,length,sweetness,color):self._attrs=[length,sweetness,color]res={}for label in self._labels:prob=self._priori_prob[label]#取某水果占比率#print(label ,"的占比率:",prob)for attr in self._attrs:#單個(gè)水果的某個(gè)特征概率除以總的某個(gè)特征概率 再乘以某水果占比率#print(self._likelihole_prob[label][attr]) #print(self._evidence_prob[attr])#print(self._likelihole_prob[label][attr]/self._evidence_prob[attr])prob*=self._likelihole_prob[label][attr]/self._evidence_prob[attr]#print(prob)res[label]=probreturn res
generate_attires. py
#隨機(jī)產(chǎn)生測(cè)試數(shù)據(jù)集來(lái)測(cè)試貝葉斯分類器的預(yù)測(cè)能力import random
defrondom_attr(pair):#生成0~1之間的隨機(jī)數(shù)return pair[random.randint(0,1)]defgen_attrs():#特征值的取值集合sets=[('long','not_long'),('sweet','not_sweet'),('yellow','not_yellow')]test_datasets=[]for i inrange(3):#使用map函數(shù)來(lái)生成一組特征值test_datasets.append(list(map(rondom_attr,sets)))return test_datasets
classfication. py
#使用貝葉斯分類器對(duì)測(cè)試結(jié)果進(jìn)行分類import operator
import bayes_classfier
import generate_attires
defmain():test_datasets=generate_attires.gen_attrs()#print(test_datasets)classfier =bayes_classfier.navie_bayes_classifier()for data in test_datasets:print("特征值:",end='\t')print(data)print("預(yù)測(cè)結(jié)果:",end='\t')res=classfier.get_label(*data)#表示多參傳入print(res)#預(yù)測(cè)屬于哪種水果的概率print('水果類別:',end='\t')#對(duì)后驗(yàn)概率排序,輸出概率最大的標(biāo)簽print(sorted(res.items(),key=operator.itemgetter(1),reverse=True)[0][0])if __name__ =='__main__':main()