# encoding=utf-8import easygui as gg
import osdefopenTxt():if(not(os.path.exists(r"C:\Users\Public\Documents\TXT"))):os.mkdir(r"C:\Users\Public\Documents\TXT")txtpath = gg.fileopenbox()print(txtpath)return txtpathdefclearBlankLine(txtpath):file1 =open(txtpath,'r', encoding='utf-8')# 要去掉空行的文件file2 =open(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt','w', encoding='utf-8')# 生成沒有空行的文件try:for line in file1.readlines():if line =='\n':line = line.strip("\n")file2.write(line)finally:file1.close()file2.close()defseprateTxt():a =[0]withopen(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt', mode="r", encoding="utf-8")as f2:end = f2.readlines()# print(len(end))for n in end:###########################if"Total"in n:a.append(end.index(n)+1)# 獲取個數(shù),即可知道列表共有幾行print(len(a))print(a)print(len(end))s = end # 按索引值將列表進行拆分indices = a # 按索引值將列表進行拆分parts =[s[i:j]for i, j inzip(indices, indices[1:]+[None])]# 按索引值將列表進行拆分print(parts)i =0while i <len(a):for b in parts[i]:print(b)for txt in b:withopen(r'C:\Users\Public\Documents\TXT\txt%03d.txt'%(i +1), mode="a", encoding="utf-8")as f3:f3.write("%s"% txt)f3.close()i +=1if __name__ =='__main__':clearBlankLine(openTxt())seprateTxt()os.remove(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt')
如何從文本文件里獲取信息形成表格數(shù)據(jù)集? 代碼實現(xiàn)如下
defTableData(txtpath):a, b, c, d, e =[],[],[],[],[]withopen(txtpath, mode="r", encoding="utf-8")asfile:endd =file.readlines()for i in endd:if"K-series"in i:a.append(endd[endd.index(i)])defremove(m):for j in a[m].split(" "):b.append(j)n = b.count('')for k inrange(0, n):b.remove('')m =0while m <len(a):remove(m)m +=1if b[0]=="O":b.insert(6,"/")b[6]="/"else:passfor k in b:d.append(k.replace("\n",""))if"F"in k and"Fe"notin k and"FeO"notin k:print(b.index(k))b.insert((b.index(k))+6,"/")########################每10個數(shù)將列表拆分成一個子列表##########################################n =10f =[d[i:i + n]for i inrange(0,len(d), n)]return f
運行效果如下:
C:\Users\0\AppData\Local\Programs\Python\Python38\python.exe E:/Users/0/Desktop/PYTHON/nengpu/從文本獲取關(guān)鍵信息2.py
['O','8','K-series','37.73','46.77','61.61','/','0.00','0.00','4.62\n','Si','14','K-series','20.57','25.49','19.13','SiO2','54.54','44.00','0.89\n','Al','13','K-series','12.70','15.74','12.30','Al2O3','29.74','24.00','0.62\n','K','19','K-series','6.41','7.95','4.28','K2O','9.58','7.73','0.23\n','Mg','12','K-series','1.72','2.13','1.85','MgO','3.53','2.85','0.12\n','Fe','26','K-series','1.14','1.41','0.53','FeO','1.81','1.46','0.09\n','Ti','22','K-series','0.29','0.36','0.16','TiO2','0.59','0.48','0.04\n','Na','11','K-series','0.12','0.15','0.14','Na2O','0.21','0.17','0.04\n'][['O','8','K-series','37.73','46.77','61.61','/','0.00','0.00','4.62'],['Si','14','K-series','20.57','25.49','19.13','SiO2','54.54','44.00','0.89'],['Al','13','K-series','12.70','15.74','12.30','Al2O3','29.74','24.00','0.62'],['K','19','K-series','6.41','7.95','4.28','K2O','9.58','7.73','0.23'],['Mg','12','K-series','1.72','2.13','1.85','MgO','3.53','2.85','0.12'],['Fe','26','K-series','1.14','1.41','0.53','FeO','1.81','1.46','0.09'],['Ti','22','K-series','0.29','0.36','0.16','TiO2','0.59','0.48','0.04'],['Na','11','K-series','0.12','0.15','0.14','Na2O','0.21','0.17','0.04']]880.68Process finished with exit code 0
第三步 創(chuàng)建word 并自動寫入圖片和表格數(shù)據(jù)
思路: 將對應的圖片和文本文件位置遍歷出來, 用循環(huán)語句寫入到word對象中 代碼如下
#-*- coding: UTF-8 -*-from docx import Document
from docx.shared import Inches
from docx.oxml.ns import qn
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_ALIGN_VERTICAL
import easygui as gg
import os
import shutildoc = Document()
pic1 =[]
pic2 =[]
txt =[]
table =[]
table2 =[]defHeaderFooter(): header = doc.sections[0].header # 獲取第一個節(jié)的頁眉 # print('能譜報告', len(header.paragraphs)) paragraph = header.paragraphs[0]# 獲取頁眉的第一個段落 header = paragraph.add_run('能譜報告')# 添加頁面內(nèi)容 header.font.size = Pt(20) paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 段落文字居中設置 header.font.name ='Times New Roman' header._element.rPr.rFonts.set(qn('w:eastAsia'),'宋體')# 設置中文字體 footer = doc.sections[0].footer # 獲取第一個節(jié)的頁腳 paragraph2 = footer.paragraphs[0]# 獲取頁腳的第一個段落 footer = paragraph2.add_run('實驗員:劉月寧%s審核員:楊武'%(' '*120))# 添加頁腳內(nèi)容 footer.font.size = Pt(10) footer.font.name ='Times New Roman' footer._element.rPr.rFonts.set(qn('w:eastAsia'),'宋體')# 設置中文字體 paragraph2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 段落文字居中設置 defFirstSecondPragragh(txtpath):withopen(txtpath, mode="r", encoding="utf-8")asfile: endd =file.readlines() name = endd[0].replace(endd[0],"樣品號:"+ endd[0]) data = endd[1].replace("Date:","時間:") size = endd[2].replace("Image size:","圖片尺寸:") mag = endd[3].replace("Mag:","放大倍數(shù):") p = doc.add_paragraph() run = p.add_run('%s %s%s %s'%(name.replace("\n",""), data, size.replace("\n",""), mag.replace("\n","")))# 使用add_run添加文字 run.font.size = Pt(12)# 字體大小設置,和word里面的字號相對應,小一 p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 段落文字居中設置 run.font.name ='Times New Roman' run._element.rPr.rFonts.set(qn('w:eastAsia'),'宋體')# 設置中文字體 run.bold =TruedefFirstPicture(pic1): doc.add_picture(pic1, width=Inches(3)) last_paragraph = doc.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # 圖片居中設置 last_paragraph.paragraph_format.space_before = Pt(0) last_paragraph.paragraph_format.space_after = Pt(0)defSecondPicture(pic2): doc.add_picture(pic2, width=Inches(6), height=Inches(2.5)) last_paragraph = doc.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # 圖片居中設置 last_paragraph.paragraph_format.space_before = Pt(2) last_paragraph.paragraph_format.space_after = Pt(2)defAddTable(f, tenfour): table1 = doc.add_table(rows=11, cols=10) table1.style = doc.styles['Table Grid']# 表格樣式 table1.name ='Time New Roman'# table1.autofit = True # 很重要! table1.style.font.size = Pt(9) table1.style.font.bold =Truefor r1 inrange(11):# 循環(huán),將第3列所有單元格寬度調(diào)整至1Inches table1.cell(r1,2).width = Inches(1)for r2 inrange(11):# 循環(huán),將第2列所有單元格寬度調(diào)整至0.5Inches table1.cell(r2,1).width = Inches(0.5) table1.cell(0,0).text ="元素" table1.cell(0,1).text ="原子\n序數(shù)" table1.cell(0,2).text ="序列" table1.cell(0,3).text ="非歸一化質(zhì)量百分比[Wt%]" table1.cell(0,4).text ="歸一化質(zhì)量百分比[Wt%]" table1.cell(0,5).text ="歸一化原子百分比[Wt%]" table1.cell(0,6).text ="化合物" table1.cell(0,7).text ="非歸一化質(zhì)量百分比[Wt%]" table1.cell(0,8).text ="歸一化質(zhì)量百分比[Wt%]" table1.cell(0,9).text ="絕對\n誤差[Wt%]" table1.cell(10,0).text ="Total" table1.cell(10,4).text ="100" table1.cell(10,5).text ="100" table1.cell(10,3).text ="%s"% tenfour for rr inrange(len(table[iii])):for cc inrange(10): table1.cell(rr +1, cc).text = f[rr][cc]for r inrange(11):# 循環(huán)將每一行,每一列都設置為水平居中 for c inrange(10): table1.cell(r, c).vertical_alignment = WD_ALIGN_VERTICAL.CENTER table1.cell(r, c).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER doc.add_page_break()defOpenpic1turedir(): msg =r'C:\Users\Public\Documents\Word' title ='文件選擇對話框' default =r'C:\Users\Public\Documents\Word' path = gg.diropenbox(msg, title, default) g1 = os.walk(path)for path, dir_list, file_list in g1:for file_name in file_list:if".jpg"in os.path.join(path, file_name):# print(os.path.join(path, file_name)) picpath = os.path.join(path, file_name)print(picpath) pic1.append(picpath)print(pic1)defOpenpic2turedir(): msg =r'C:\Users\Public\Documents\Word1' title ='文件選擇對話框' default =r'C:\Users\Public\Documents\Word1' path2 = gg.diropenbox(msg, title, default) g2 = os.walk(path2)for path, dir_list, file_list in g2:for file_name in file_list:if".jpg"in os.path.join(path, file_name):# print(os.path.join(path, file_name)) picpath2 = os.path.join(path, file_name)print(picpath2) pic2.append(picpath2)print(pic2)defOpentxt(): msg =r'C:\Users\Public\Documents\TXT' title ='文件選擇對話框' default =r'C:\Users\Public\Documents\TXT' pathtxt = gg.diropenbox(msg, title, default) gt = os.walk(pathtxt)for path, dir_list, file_list in gt:for file_name in file_list:if".txt"in os.path.join(path, file_name):# print(os.path.join(path, file_name)) txtpath = os.path.join(path, file_name)print(txtpath) txt.append(txtpath)print(txt)defTableData(txtpath): a, b, c, d, e =[],[],[],[],[]withopen(txtpath, mode="r", encoding="utf-8")asfile: endd =file.readlines()for i in endd:if"K-series"in i: a.append(endd[endd.index(i)])defremove(m):for j in a[m].split(" "): b.append(j) n = b.count('')for k inrange(0, n): b.remove('') m =0while m <len(a): remove(m) m +=1if b[0]=="O": b.insert(6,"/") b[6]="/"else:passfor k in b: d.append(k.replace("\n",""))if"F"in k and"Fe"notin k and"FeO"notin k:print(b.index(k)) b.insert((b.index(k))+6,"/")########################每10個數(shù)將列表拆分成一個子列表########################################## n =10 f =[d[i:i + n]for i inrange(0,len(d), n)]return f defTableData2(txtpath):withopen(txtpath, mode="r", encoding="utf-8")as file2: endd2 = file2.readlines() down = endd2[-1].split(" ")# print(down) n = down.count('')for k inrange(0, n): down.remove('') tenfour = down[1]# print(tenfour) return tenfour if __name__ =='__main__': Openpic1turedir() Openpic2turedir() Opentxt() HeaderFooter() iii =0while iii <len(txt): FirstSecondPragragh(txt[iii]) TableData(txt[iii]) TableData2(txt[iii]) table.append(TableData(txt[iii])) table2.append(TableData2(txt[iii]))print(table[iii])print(table2[iii]) FirstPicture(pic1[iii]) SecondPicture(pic2[iii]) AddTable(table[iii], table2[iii]) iii +=1 doc.save(r'C:\Users\Public\Documents\res\輸出的能譜報告.docx') gg.msgbox("您的報告已生成在 C:\\Users\Public\Documents\\res ","提示") shutil.rmtree(r'C:\Users\Public\Documents\Word1') shutil.rmtree(r'C:\Users\Public\Documents\Word') shutil.rmtree(r'C:\Users\Public\Documents\TXT')