生活随笔
收集整理的這篇文章主要介紹了
python统计三国演义中人物出现的频次
小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
方式一. 簡(jiǎn)化版
安裝jieba庫(kù)/numpy庫(kù)編程讀取《三國(guó)演義》電子書,輸出出場(chǎng)次數(shù)最高的10個(gè)人物名字
代碼注釋:
import numpy
import jieba# numpy輸出有省略號(hào)的問(wèn)題,無(wú)法顯示全部數(shù)據(jù)
numpy
.set_printoptions(threshold
=numpy
.inf
)def
readFile(path
):with open(path
, mode
='r', encoding
='utf-8') as f
:try:data
= f
.read()if data is not
None or data
!= ''
:return dataexcept
:print("讀取文件失敗!")if __name__
== "__main__":# 讀取文本內(nèi)容text
= readFile('三國(guó)演義
.txt'
)# 搜索引擎模式:在精確模式基礎(chǔ)上,對(duì)長(zhǎng)詞再次切分arr
= jieba
.cut_for_search(text
)obj
= {}for name in arr
:# 分詞長(zhǎng)度為
2、
3收錄對(duì)象
if len(name
) == 2 or
len(name
) == 3:# 定義對(duì)象屬性和統(tǒng)計(jì)當(dāng)前對(duì)象出現(xiàn)頻次obj
[name
] = obj
.get(name
, 0) + 1# 對(duì)象轉(zhuǎn)化為列表items
= list(obj
.items())"""提供同質(zhì)數(shù)組基本類型的字符串基本字符串格式由3部分組成: 描述數(shù)據(jù)字節(jié)順序的字符(<: little-endian,>: big-endian,|: not-relevant),給出數(shù)組基本類型的字符代碼,以及提供類型使用的字節(jié)數(shù)的整數(shù)。基本類型字符代碼為:代碼 描述t 位字段(Bit field,后面的整數(shù)表示位字段中的位數(shù))。b Boolean(Boolean 整數(shù)類型,其中所有值僅為True或False)。i Integer(整數(shù))u 無(wú)符號(hào)整數(shù)(Unsigned integer)f 浮點(diǎn)數(shù)(Floating point)c 復(fù)浮點(diǎn)數(shù)(Complex floating point)m 時(shí)間增量(Timedelta)M 日期增量(Datetime)O 對(duì)象(即內(nèi)存包含指向 PyObject 的指針)S 字符串(固定長(zhǎng)度的char序列)U Unicode(Py_UNICODE的固定長(zhǎng)度序列)V 其他(void * - 每個(gè)項(xiàng)目都是固定大小的內(nèi)存塊"""people
= numpy
.dtype([('name', 'U2'), ('count', int)])# 列表轉(zhuǎn)化為數(shù)組ar
= numpy
.array(items
, dtype
=people
)"""axis=0 列遞增kind='mergesort' 堆排序order='count' 排序字段flipud() 倒置排序"""
print(numpy
.flipud(numpy
.sort(ar
, axis
=0, kind
='mergesort'
, order
='count')))
二.方式二 詞云統(tǒng)計(jì)–轉(zhuǎn)自
Python 三國(guó)演義文本可視化(詞云,人物關(guān)系圖,主要人物出場(chǎng)次數(shù),章回字?jǐn)?shù))
alice_mask.png
"""
Created on Wed Jun 23 11:41:01 2021@author: 陳建兵
"""
import networkx
as nx
import matplotlib
.pyplot
as plt
import jieba
.posseg
as pseg
import random
import codecs
from pyecharts
import options
as opts
from pyecharts
.charts
import Bar
from pyecharts
.charts
import WordCloud
from pyecharts
.charts
import Line
import wordcloud
import imageio
mainTop
= 15
def read_txt(filepath
):file = open(filepath
, 'r+', encoding
='utf-8')txt
= file.read
()file.close
()return txt
txt
= read_txt
('三國(guó)演義.txt')
def stopwordslist(filepath
):stopwords
= [line
.strip
() for line
in open(filepath
, 'r', encoding
='utf-8').readlines
()]return stopwordsexcludes
= {'將軍', '卻說(shuō)', '令人', '趕來(lái)', '徐州', '不見(jiàn)', '下馬', '喊聲', '因此', '未知', '大敗', '百姓', '大事','一軍', '之后', '接應(yīng)', '起兵','成都', '原來(lái)', '江東', '正是', '忽然', '原來(lái)', '大叫', '上馬', '天子', '一面', '太守', '不如', '忽報(bào)','后人', '背后', '先主', '此人','城中', '然后', '大軍', '何不', '先生', '何故', '夫人', '不如', '先鋒', '二人', '不可', '如何', '荊州','不能', '如此', '主公', '軍士','商議', '引兵', '次日', '大喜', '魏兵', '軍馬', '于是', '東吳', '今日', '左右', '天下', '不敢', '陛下','人馬', '不知', '都督', '漢中','一人', '眾將', '后主', '只見(jiàn)', '蜀兵', '馬軍', '黃巾', '立功', '白發(fā)', '大吉', '紅旗', '士卒', '錢糧','于漢', '郎舅', '龍鳳', '古之', '白虎','古人云', '爾乃', '馬飛報(bào)', '軒昂', '史官', '侍臣', '列陣', '玉璽', '車駕', '老夫', '伏兵', '都尉', '侍中','西涼', '安民', '張?jiān)?#39;, '文武', '白旗','祖宗', '尋思'}
counts
= {}
def getWordTimes():poss
= pseg
.cut
(txt
)for w
in poss
:if w
.flag
!= 'nr' or len(w
.word
) < 2 or w
.word
in excludes
:continue elif w
.word
== '孔明' or w
.word
== '孔明曰' or w
.word
== '臥龍先生':real_word
= '諸葛亮'elif w
.word
== '云長(zhǎng)' or w
.word
== '關(guān)公曰' or w
.word
== '關(guān)公':real_word
= '關(guān)羽'elif w
.word
== '玄德' or w
.word
== '玄德曰' or w
.word
== '玄德甚' or w
.word
== '玄德遂' or w
.word
== '玄德兵' or w
.word
== '玄德領(lǐng)' \
or w
.word
== '玄德同' or w
.word
== '劉豫州' or w
.word
== '劉玄德':real_word
= '劉備'elif w
.word
== '孟德' or w
.word
== '丞相' or w
.word
== '曹賊' or w
.word
== '阿瞞' or w
.word
== '曹丞相' or w
.word
== '曹將軍':real_word
= '曹操'elif w
.word
== '高祖':real_word
= '劉邦'elif w
.word
== '光武':real_word
= '劉秀'elif w
.word
== '桓帝':real_word
= '劉志'elif w
.word
== '靈帝':real_word
= '劉宏'elif w
.word
== '公瑾':real_word
= '周瑜'elif w
.word
== '伯符':real_word
= '孫策'elif w
.word
== '呂奉先' or w
.word
== '布乃' or w
.word
== '布大怒' or w
.word
== '呂布之':real_word
= '呂布'elif w
.word
== '趙子龍' or w
.word
== '子龍':real_word
= '趙云'elif w
.word
== '卓大喜' or w
.word
== '卓大怒':real_word
= '董卓' else:real_word
= w
.wordcounts
[real_word
] = counts
.get
(real_word
, 0) + 1getWordTimes
()
items
= list(counts
.items
())
items
.sort
(key
=lambda x
: x
[1], reverse
=True)
def wordFreq(filepath
, topn
):with codecs
.open(filepath
, "w", "utf-8") as f
:for i
in range(topn
):word
, count
= items
[i
]f
.write
("{}:{}\n".format(word
, count
))
wordFreq
("三國(guó)演義詞頻_人名.txt", 300)
fr
= open('三國(guó)演義詞頻_人名.txt', 'r', encoding
='utf-8')
dic
= {}
keys
= []
for line
in fr
:v
= line
.strip
().split
(':')dic
[v
[0]] = v
[1]keys
.append
(v
[0])
fr
.close
()
print("人物出現(xiàn)次數(shù)TOP", mainTop
)
print(list(dic
.items
())[:mainTop
])
list_name
= list(dic
.keys
())
list_name_times
= list(dic
.values
())
def creat_people_view():bar
= Bar
()bar
.add_xaxis
(list_name
[0:mainTop
])bar
.add_yaxis
("人物出場(chǎng)次數(shù)", list_name_times
)bar
.set_global_opts
(title_opts
=opts
.TitleOpts
(title
="人物出場(chǎng)次數(shù)可視化圖", subtitle
="三國(guó)人物TOP" + str(mainTop
)),toolbox_opts
=opts
.ToolboxOpts
(is_show
=True),xaxis_opts
=opts
.AxisOpts
(axislabel_opts
={"rotate": 45}))bar
.set_series_opts
(label_opts
=opts
.LabelOpts
(position
="top"))bar
.render_notebook
() bar
.render
("三國(guó)演義人物出場(chǎng)次數(shù)可視化圖.html")
def creat_wordcloud():bg_pic
= imageio
.imread
(uri
='alice_mask.png')wc
= wordcloud
.WordCloud
(font_path
='c:\Windows\Fonts\simhei.ttf',background_color
='white',width
=1000, height
=800,max_words
=500,mask
=bg_pic
)wc
.generate_from_frequencies
(counts
)wc
.to_file
('三國(guó)演義詞云_人名.png')plt
.imshow
(wc
)plt
.axis
('off')plt
.show
()
def creat_wordcloud_pyecharts():wordsAndTimes
= list(dic
.items
())(WordCloud
().add
(series_name
="人物次數(shù)", data_pair
=wordsAndTimes
,word_size_range
=[20, 100], textstyle_opts
=opts
.TextStyleOpts
(font_family
="cursive"), ).set_global_opts
(title_opts
=opts
.TitleOpts
(title
="三國(guó)演義詞云")).render
("三國(guó)演義詞云_人名.html"))
def chapter_word():list2
= txt
.split
("------------")chapter_list
= [i
for i
in range((len(list2
)))]word_list
= [len(i
) for i
in list2
](Line
(init_opts
=opts
.InitOpts
(width
="1400px", height
="700px")).add_xaxis
(xaxis_data
=chapter_list
).add_yaxis
(series_name
="章回字?jǐn)?shù)",y_axis
=word_list
,markpoint_opts
=opts
.MarkPointOpts
(data
=[opts
.MarkPointItem
(type_
="max", name
="最大值"),opts
.MarkPointItem
(type_
="min", name
="最小值"),]),markline_opts
=opts
.MarkLineOpts
(data
=[opts
.MarkLineItem
(type_
="average", name
="平均值")]),).set_global_opts
(title_opts
=opts
.TitleOpts
(title
="三國(guó)演義章回字?jǐn)?shù)", subtitle
=""),tooltip_opts
=opts
.TooltipOpts
(trigger
="axis"),toolbox_opts
=opts
.ToolboxOpts
(is_show
=True),xaxis_opts
=opts
.AxisOpts
(type_
="category", boundary_gap
=False),).render
("三國(guó)演義章回字?jǐn)?shù).html"))
colorNum
= len(list_name
[0:mainTop
])
def randomcolor():colorArr
= ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F']color
= ""for i
in range(6):color
+= colorArr
[random
.randint
(0, 14)]return "#" + color
def color_list():colorList
= []for i
in range(colorNum
):colorList
.append
(randomcolor
())return colorList
plt
.rcParams
['font.sans-serif'] = ['SimHei']
def creat_relationship():colors
= color_list
()Names
= list_name
[0:mainTop
]relations
= {}lst_para
= (txt
).split
('\n') for text
in lst_para
:for name_0
in Names
:if name_0
in text
:for name_1
in Names
:if name_1
in text
and name_0
!= name_1
and (name_1
, name_0
) not in relations
:relations
[(name_0
, name_1
)] = relations
.get
((name_0
, name_1
), 0) + 1maxRela
= max([v
for k
, v
in relations
.items
()])relations
= {k
: v
/ maxRela
for k
, v
in relations
.items
()}plt
.figure
(figsize
=(15, 15))G
= nx
.Graph
()for k
, v
in relations
.items
():G
.add_edge
(k
[0], k
[1], weight
=v
)elarge
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if d
['weight'] > 0.6]emidle
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if (d
['weight'] > 0.3) & (d
['weight'] <= 0.6)]esmall
= [(u
, v
) for (u
, v
, d
) in G
.edges
(data
=True) if d
['weight'] <= 0.3]pos
= nx
.spring_layout
(G
) nx
.draw_networkx_nodes
(G
, pos
, alpha
=0.8, node_size
=1300, node_color
=colors
)nx
.draw_networkx_edges
(G
, pos
, edgelist
=elarge
, width
=2.5, alpha
=0.9, edge_color
='g')nx
.draw_networkx_edges
(G
, pos
, edgelist
=emidle
, width
=1.5, alpha
=0.6, edge_color
='y')nx
.draw_networkx_edges
(G
, pos
, edgelist
=esmall
, width
=1, alpha
=0.4, edge_color
='b', style
='dashed')nx
.draw_networkx_labels
(G
, pos
, font_size
=14)plt
.title
("《三國(guó)演義》主要人物社交關(guān)系網(wǎng)絡(luò)圖")plt
.axis
('off')plt
.savefig
('《三國(guó)演義》主要人物社交關(guān)系網(wǎng)絡(luò)圖.png', bbox_inches
='tight')plt
.show
()def main():creat_people_view
()creat_wordcloud
()creat_wordcloud_pyecharts
()creat_relationship
()chapter_word
()if __name__
== '__main__':main
()
總結(jié)
以上是生活随笔為你收集整理的python统计三国演义中人物出现的频次的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。