词频写入excel
#!/usr/bin/python???
# -*- coding:utf-8 -*-???
?
import sys?
reload(sys)?
?
sys.setdefaultencoding('utf-8')?
?
import jieba?
import jieba.analyse?
import xlwt #寫(xiě)入Excel表的庫(kù)?
?
if __name__=="__main__":?
?
??? wbk = xlwt.Workbook(encoding = 'ascii')?
??? sheet = wbk.add_sheet("wordCount")#Excel單元格名字?
??? word_lst = []?
??? key_list=[]?
??? for line in open('ceshi.txt'):#1.txt是需要分詞統(tǒng)計(jì)的文檔?
?
??????? item = line.strip('\n\r').split('\t') #制表格切分?
??????? # print item?
??????? tags = jieba.analyse.extract_tags(item[0]) #jieba分詞
??????? # analyse.extract_tags獲取關(guān)鍵詞 jieba.cut('xxx.txt',cut_all=false/true)參數(shù)true/false代表全模式,精確模式
??????? for t in tags:?
??????????? word_lst.append(t)?
?
??? word_dict= {}?
??? with open("wordCount.txt",'w') as wf2: #打開(kāi)文件?
?
??????? for item in word_lst:?
??????????? if item not in word_dict: #統(tǒng)計(jì)數(shù)量?
??????????????? word_dict[item] = 1?
??????????? else:?
??????????????? word_dict[item] += 1?
?
??????? orderList=list(word_dict.values())?
??????? orderList.sort(reverse=True)?
??????? # print orderList?
??????? for i in range(len(orderList)):?
??????????? for key in word_dict:?
??????????????? if word_dict[key]==orderList[i]:?
??????????????????? wf2.write(key+' '+str(word_dict[key])+'\n') #寫(xiě)入txt文檔?
??????????????????? key_list.append(key)?
??????????????????? word_dict[key]=0?
?????
?????
??? for i in range(len(key_list)):?
??????? sheet.write(i, 1, label = orderList[i])?
??????? sheet.write(i, 0, label = key_list[i])?
??? wbk.save('wordCount.xls') #保存為 worword_dict= {} dCount.xls文件
轉(zhuǎn)載于:https://www.cnblogs.com/lh459384111/p/7759792.html
總結(jié)
- 上一篇: 敦煌莫高窟是谁画的呢?
- 下一篇: Git 常见问题记录