python商品评论分析_亚马逊产品情感评论分析
import requests
from lxml import etree
import re
import xlwt
from openpyxl import workbook # 寫入Excel表所用
from openpyxl import load_workbook # 讀取excel
# import matplotlib.pylab as plt
from xlrd import book
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:70.0) Gecko/20100101 Firefox/70.0',
'Cookie': 'x-wl-uid=1DVw4k4T/jAduWIfwW2jvf029Ha4Bgv/AJGjP/yRfJTdq26dr7oDdeEBdb6zOPUl0ByfsaKJ3GUY=; session-id-time=2082729601l; session-id=457-7649276-4174543; csm-hit=tb:DAHATSQRZZBWHWD4ZXYP+s-T61YJHRDEC6Y6S2VMTVZ|1573355007668&t:1573355007668&adb:adblk_no; ubid-acbcn=459-2457809-1906210; session-token="4sZGQQPKw9CJUOzJFLsTdS3FtlpqIyp0hyvhXL6RMOchbDf7p7YLDEL90YFps2Hl80fBT6uPmzQ00meCLYxsrjuoabX3+kz7OB+CLw8GaAYZB8J9oBBcJLBUsGs6LLm/EHQht5Tm0IpOKR0hz0GGtATgcpJXDfRoEdvNol+CUc3mXOMA5KmEfFWstdV+KwyzSGrGW+DdrAftisgZMl2stffIdhcOLh53B4tJwsR5awKqPrOqZF8uJg=="; lc-acbcn=zh_CN; i18n-prefs=CNY'
} #添加headers模擬瀏覽器防止被發現
hao = []
zhong = []
cha = [] #獲取到的評論數存入里面
all_info_list = []
def parge_page(url):
response = requests.get(url=url,headers=headers)
#print(response) #測試一下看看也沒有請求到網頁
text = response.text
html = etree.HTML(text)
quan = html.xpath('//div[@id="cm_cr-review_list"]/div') #獲取到每個人的評論
for i in quan:
pinfen1 = i.xpath('.//span[@class="a-icon-alt"]/text()') #獲取到每個人的評分幾顆星
pinlun = i.xpath('.//span[@data-hook="review-body"]/span/text()') #獲取到每個人評論的字
time = i.xpath('.//span[@data-hook="review-date"]/text()')
color = i.xpath('.//a[@ data-hook="format-strip"]/text()')
# size = i.xpath('.//i[@ class="a-icon-text-separator"]/text()')
#print(pinlun)
for pinfen in pinfen1:
#print(pinlun)
a = re.sub('顆星','',pinfen) #使用正則把后面不用的字符串替換為空,顯得好看
#print(a)
list = {'評論':pinlun,'顏色和尺寸':color,'評分': a,'日期':time}
print(list)
info_list = [pinlun, color, a,time]
all_info_list.append(info_list)
# if a < str(2.0): #判斷,小于3顆星就存入差評
# cha.append(a)
# elif a < str(4.0): #小于4顆星就存入中評
# zhong.append(a)
# else:
# hao.append(a) #否則都是好評
def main():
# url = 'https://www.amazon.cn/product-reviews/B074MFRPWL'
# parge_page(url)
for x in range(10): #獲取100條評論,一頁10條
url = 'https://www.amazon.com/product-reviews/B07XGK5QXD/?pageNumber='+ str(x) #網站:
if __name__ == '__main__':
main() # 調用main方法
book = xlwt.Workbook(encoding='utf-8')
sheet = book.add_sheet('sheet1')
head = ['評論', '顏色和尺寸', '評分', '日期'] # 表頭
for h in range(len(head)):
sheet.write(0, h, head[h]) # 寫入表頭
i = 1
for list in all_info_list:
j = 0
for data in list:
sheet.write(i, j, data)
j += 1
i += 1
book.save('評論信息.xlsx')
print('完成',book)
總結
以上是生活随笔為你收集整理的python商品评论分析_亚马逊产品情感评论分析的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: python3语法错误python_[大
- 下一篇: synchronized()_这篇文章带