日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問(wèn) 生活随笔!

生活随笔

當(dāng)前位置: 首頁(yè) > 编程语言 > python >内容正文

python

python统计三国演义中人物出现的频次

發(fā)布時(shí)間:2023/12/18 python 48 豆豆
生活随笔 收集整理的這篇文章主要介紹了 python统计三国演义中人物出现的频次 小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

方式一. 簡(jiǎn)化版

  • 安裝jieba庫(kù)/numpy庫(kù)
  • 編程讀取《三國(guó)演義》電子書,輸出出場(chǎng)次數(shù)最高的10個(gè)人物名字

  • 代碼注釋:

    import numpy import jieba# numpy輸出有省略號(hào)的問(wèn)題,無(wú)法顯示全部數(shù)據(jù) numpy.set_printoptions(threshold=numpy.inf)def readFile(path):with open(path, mode='r', encoding='utf-8') as f:try:data = f.read()if data is not None or data != '':return dataexcept:print("讀取文件失敗!")if __name__ == "__main__":# 讀取文本內(nèi)容text = readFile('三國(guó)演義.txt')# 搜索引擎模式:在精確模式基礎(chǔ)上,對(duì)長(zhǎng)詞再次切分arr = jieba.cut_for_search(text)obj = {}for name in arr:# 分詞長(zhǎng)度為23收錄對(duì)象if len(name) == 2 or len(name) == 3:# 定義對(duì)象屬性和統(tǒng)計(jì)當(dāng)前對(duì)象出現(xiàn)頻次obj[name] = obj.get(name, 0) + 1# 對(duì)象轉(zhuǎn)化為列表items = list(obj.items())"""提供同質(zhì)數(shù)組基本類型的字符串基本字符串格式由3部分組成: 描述數(shù)據(jù)字節(jié)順序的字符(<: little-endian,>: big-endian,|: not-relevant),給出數(shù)組基本類型的字符代碼,以及提供類型使用的字節(jié)數(shù)的整數(shù)。基本類型字符代碼為:代碼 描述t 位字段(Bit field,后面的整數(shù)表示位字段中的位數(shù))。b Boolean(Boolean 整數(shù)類型,其中所有值僅為True或False)。i Integer(整數(shù))u 無(wú)符號(hào)整數(shù)(Unsigned integer)f 浮點(diǎn)數(shù)(Floating point)c 復(fù)浮點(diǎn)數(shù)(Complex floating point)m 時(shí)間增量(Timedelta)M 日期增量(Datetime)O 對(duì)象(即內(nèi)存包含指向 PyObject 的指針)S 字符串(固定長(zhǎng)度的char序列)U Unicode(Py_UNICODE的固定長(zhǎng)度序列)V 其他(void * - 每個(gè)項(xiàng)目都是固定大小的內(nèi)存塊"""people = numpy.dtype([('name', 'U2'), ('count', int)])# 列表轉(zhuǎn)化為數(shù)組ar = numpy.array(items, dtype=people)"""axis=0 列遞增kind='mergesort' 堆排序order='count' 排序字段flipud() 倒置排序""" print(numpy.flipud(numpy.sort(ar, axis=0, kind='mergesort', order='count')))

    二.方式二 詞云統(tǒng)計(jì)–轉(zhuǎn)自

    Python 三國(guó)演義文本可視化(詞云,人物關(guān)系圖,主要人物出場(chǎng)次數(shù),章回字?jǐn)?shù))



    alice_mask.png

    # -*- coding: utf-8 -*- """ Created on Wed Jun 23 11:41:01 2021@author: 陳建兵 """# 導(dǎo)入networkx,matplotlib包 import networkx as nx import matplotlib.pyplot as plt import jieba.posseg as pseg # 引入詞性標(biāo)注接口 # 導(dǎo)入random包 import random import codecs # 導(dǎo)入pyecharts from pyecharts import options as opts # pyecharts 柱狀圖 from pyecharts.charts import Bar # pyecharts 詞云圖 from pyecharts.charts import WordCloud # pyecharts 折線/面積圖 from pyecharts.charts import Line # 詞云 import wordcloud import imageio# 定義主要人物的個(gè)數(shù)(用于人物關(guān)系圖,人物出場(chǎng)次數(shù)可視化圖) mainTop = 15# 讀取文本 def read_txt(filepath):file = open(filepath, 'r+', encoding='utf-8')txt = file.read()file.close()return txt# 獲取小說(shuō)文本 txt = read_txt('三國(guó)演義.txt')# 停詞文檔 def stopwordslist(filepath):stopwords = [line.strip() for line in open(filepath, 'r', encoding='utf-8').readlines()]return stopwords# stopwords = stopwordslist('中文停用詞庫(kù).txt')excludes = {'將軍', '卻說(shuō)', '令人', '趕來(lái)', '徐州', '不見(jiàn)', '下馬', '喊聲', '因此', '未知', '大敗', '百姓', '大事','一軍', '之后', '接應(yīng)', '起兵','成都', '原來(lái)', '江東', '正是', '忽然', '原來(lái)', '大叫', '上馬', '天子', '一面', '太守', '不如', '忽報(bào)','后人', '背后', '先主', '此人','城中', '然后', '大軍', '何不', '先生', '何故', '夫人', '不如', '先鋒', '二人', '不可', '如何', '荊州','不能', '如此', '主公', '軍士','商議', '引兵', '次日', '大喜', '魏兵', '軍馬', '于是', '東吳', '今日', '左右', '天下', '不敢', '陛下','人馬', '不知', '都督', '漢中','一人', '眾將', '后主', '只見(jiàn)', '蜀兵', '馬軍', '黃巾', '立功', '白發(fā)', '大吉', '紅旗', '士卒', '錢糧','于漢', '郎舅', '龍鳳', '古之', '白虎','古人云', '爾乃', '馬飛報(bào)', '軒昂', '史官', '侍臣', '列陣', '玉璽', '車駕', '老夫', '伏兵', '都尉', '侍中','西涼', '安民', '張?jiān)?#39;, '文武', '白旗','祖宗', '尋思'} # 排除的詞匯# 使用精確模式對(duì)文本進(jìn)行分詞 # words = jieba.lcut(txt) counts = {} # 通過(guò)鍵值對(duì)的形式存儲(chǔ)詞語(yǔ)及其出現(xiàn)的次數(shù)# 得到 分詞和出現(xiàn)次數(shù) def getWordTimes():# 分詞,返回詞性poss = pseg.cut(txt)for w in poss:if w.flag != 'nr' or len(w.word) < 2 or w.word in excludes:continue # 當(dāng)分詞長(zhǎng)度小于2或該詞詞性不為nr(人名)時(shí)認(rèn)為該詞不為人名elif w.word == '孔明' or w.word == '孔明曰' or w.word == '臥龍先生':real_word = '諸葛亮'elif w.word == '云長(zhǎng)' or w.word == '關(guān)公曰' or w.word == '關(guān)公':real_word = '關(guān)羽'elif w.word == '玄德' or w.word == '玄德曰' or w.word == '玄德甚' or w.word == '玄德遂' or w.word == '玄德兵' or w.word == '玄德領(lǐng)' \or w.word == '玄德同' or w.word == '劉豫州' or w.word == '劉玄德':real_word = '劉備'elif w.word == '孟德' or w.word == '丞相' or w.word == '曹賊' or w.word == '阿瞞' or w.word == '曹丞相' or w.word == '曹將軍':real_word = '曹操'elif w.word == '高祖':real_word = '劉邦'elif w.word == '光武':real_word = '劉秀'elif w.word == '桓帝':real_word = '劉志'elif w.word == '靈帝':real_word = '劉宏'elif w.word == '公瑾':real_word = '周瑜'elif w.word == '伯符':real_word = '孫策'elif w.word == '呂奉先' or w.word == '布乃' or w.word == '布大怒' or w.word == '呂布之':real_word = '呂布'elif w.word == '趙子龍' or w.word == '子龍':real_word = '趙云'elif w.word == '卓大喜' or w.word == '卓大怒':real_word = '董卓' # 把相同意思的名字歸為一個(gè)人else:real_word = w.wordcounts[real_word] = counts.get(real_word, 0) + 1getWordTimes() items = list(counts.items()) # 進(jìn)行降序排列 根據(jù)詞語(yǔ)出現(xiàn)的次數(shù)進(jìn)行從大到小排序 items.sort(key=lambda x: x[1], reverse=True)# 導(dǎo)出數(shù)據(jù) # 分詞生成人物詞頻(寫入文檔) def wordFreq(filepath, topn):with codecs.open(filepath, "w", "utf-8") as f:for i in range(topn):word, count = items[i]f.write("{}:{}\n".format(word, count))# 生成詞頻文件 wordFreq("三國(guó)演義詞頻_人名.txt", 300)# 將txt文本里的數(shù)據(jù)轉(zhuǎn)換為字典形式 fr = open('三國(guó)演義詞頻_人名.txt', 'r', encoding='utf-8') dic = {} keys = [] # 用來(lái)存儲(chǔ)讀取的順序 for line in fr:# 去空白,并用split()方法返回列表v = line.strip().split(':')# print("v",v) # v ['諸葛亮', '1373']# 拼接字典 {'諸葛亮', '1373'}dic[v[0]] = v[1]keys.append(v[0]) fr.close() # 輸出前幾個(gè)的鍵值對(duì) print("人物出現(xiàn)次數(shù)TOP", mainTop) print(list(dic.items())[:mainTop])#  繪圖 # 人名列表 (用于人物關(guān)系圖,pyecharts人物出場(chǎng)次數(shù)圖) list_name = list(dic.keys()) # 人名 list_name_times = list(dic.values()) # 提取字典里的數(shù)據(jù)作為繪圖數(shù)據(jù)# 可視化人物出場(chǎng)次數(shù) def creat_people_view():bar = Bar()bar.add_xaxis(list_name[0:mainTop])bar.add_yaxis("人物出場(chǎng)次數(shù)", list_name_times)bar.set_global_opts(title_opts=opts.TitleOpts(title="人物出場(chǎng)次數(shù)可視化圖", subtitle="三國(guó)人物TOP" + str(mainTop)),toolbox_opts=opts.ToolboxOpts(is_show=True),xaxis_opts=opts.AxisOpts(axislabel_opts={"rotate": 45}))bar.set_series_opts(label_opts=opts.LabelOpts(position="top"))bar.render_notebook() # 在 notebook 中展示# make_snapshot(snapshot, bar.render(), "bar.png")# 生成 html 文件bar.render("三國(guó)演義人物出場(chǎng)次數(shù)可視化圖.html")# 生成詞云 def creat_wordcloud():bg_pic = imageio.imread(uri='alice_mask.png')wc = wordcloud.WordCloud(font_path='c:\Windows\Fonts\simhei.ttf',background_color='white',width=1000, height=800,# stopwords=excludes,# 設(shè)置停用詞max_words=500,mask=bg_pic # mask參數(shù)設(shè)置詞云形狀)# 從單詞和頻率創(chuàng)建詞云wc.generate_from_frequencies(counts)# generate(text) 根據(jù)文本生成詞云# wc.generate(txt)# 保存圖片wc.to_file('三國(guó)演義詞云_人名.png')# 顯示詞云圖片plt.imshow(wc)plt.axis('off')plt.show()# 使用pyecharts 的方法生成詞云 def creat_wordcloud_pyecharts():wordsAndTimes = list(dic.items())(WordCloud().add(series_name="人物次數(shù)", data_pair=wordsAndTimes,word_size_range=[20, 100], textstyle_opts=opts.TextStyleOpts(font_family="cursive"), ).set_global_opts(title_opts=opts.TitleOpts(title="三國(guó)演義詞云")).render("三國(guó)演義詞云_人名.html"))# 使用pyecharts 的方法生成章回字?jǐn)?shù) def chapter_word():# 進(jìn)行章回切片list2 = txt.split("------------")chapter_list = [i for i in range((len(list2)))]word_list = [len(i) for i in list2](Line(init_opts=opts.InitOpts(width="1400px", height="700px")).add_xaxis(xaxis_data=chapter_list).add_yaxis(series_name="章回字?jǐn)?shù)",y_axis=word_list,markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max", name="最大值"),opts.MarkPointItem(type_="min", name="最小值"),]),markline_opts=opts.MarkLineOpts(data=[opts.MarkLineItem(type_="average", name="平均值")]),).set_global_opts(title_opts=opts.TitleOpts(title="三國(guó)演義章回字?jǐn)?shù)", subtitle=""),tooltip_opts=opts.TooltipOpts(trigger="axis"),toolbox_opts=opts.ToolboxOpts(is_show=True),xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),).render("三國(guó)演義章回字?jǐn)?shù).html"))# 顏色生成 colorNum = len(list_name[0:mainTop])# print('顏色數(shù)',colorNum) def randomcolor():colorArr = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']color = ""for i in range(6):color += colorArr[random.randint(0, 14)]return "#" + colordef color_list():colorList = []for i in range(colorNum):colorList.append(randomcolor())return colorList# 解決中文亂碼 plt.rcParams['font.sans-serif'] = ['SimHei'] # 用來(lái)正常顯示中文標(biāo)簽# 生成人物關(guān)系圖 def creat_relationship():# 人物節(jié)點(diǎn)顏色colors = color_list()Names = list_name[0:mainTop]relations = {}# 按段落劃分,假設(shè)在同一段落中出現(xiàn)的人物具有共現(xiàn)關(guān)系lst_para = (txt).split('\n') # lst_para是每一段for text in lst_para:for name_0 in Names:if name_0 in text:for name_1 in Names:if name_1 in text and name_0 != name_1 and (name_1, name_0) not in relations:relations[(name_0, name_1)] = relations.get((name_0, name_1), 0) + 1maxRela = max([v for k, v in relations.items()])relations = {k: v / maxRela for k, v in relations.items()}# return relationsplt.figure(figsize=(15, 15))# 創(chuàng)建無(wú)多重邊無(wú)向圖G = nx.Graph()for k, v in relations.items():G.add_edge(k[0], k[1], weight=v)# 篩選權(quán)重大于0.6的邊elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] > 0.6]# 篩選權(quán)重大于0.3小于0.6的邊emidle = [(u, v) for (u, v, d) in G.edges(data=True) if (d['weight'] > 0.3) & (d['weight'] <= 0.6)]# 篩選權(quán)重小于0.3的邊esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d['weight'] <= 0.3]# 設(shè)置圖形布局pos = nx.spring_layout(G) # 用Fruchterman-Reingold算法排列節(jié)點(diǎn)(樣子類似多中心放射狀)# 設(shè)置節(jié)點(diǎn)樣式nx.draw_networkx_nodes(G, pos, alpha=0.8, node_size=1300, node_color=colors)# 設(shè)置大于0.6的邊的樣式nx.draw_networkx_edges(G, pos, edgelist=elarge, width=2.5, alpha=0.9, edge_color='g')# 0.3~0.6nx.draw_networkx_edges(G, pos, edgelist=emidle, width=1.5, alpha=0.6, edge_color='y')# <0.3nx.draw_networkx_edges(G, pos, edgelist=esmall, width=1, alpha=0.4, edge_color='b', style='dashed')nx.draw_networkx_labels(G, pos, font_size=14)plt.title("《三國(guó)演義》主要人物社交關(guān)系網(wǎng)絡(luò)圖")# 關(guān)閉坐標(biāo)軸plt.axis('off')# 保存圖表plt.savefig('《三國(guó)演義》主要人物社交關(guān)系網(wǎng)絡(luò)圖.png', bbox_inches='tight')plt.show()def main():# 人物出場(chǎng)次數(shù)可視化圖creat_people_view()# 詞云圖creat_wordcloud()creat_wordcloud_pyecharts()# 人物關(guān)系圖creat_relationship()# 章回字?jǐn)?shù)chapter_word()if __name__ == '__main__':main()

    總結(jié)

    以上是生活随笔為你收集整理的python统计三国演义中人物出现的频次的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。

    如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。