當(dāng)前位置：首頁 > 编程语言 > python >内容正文

python

python程序简单快速写能谱实验word报告

發(fā)布時間：2023/12/20 python 28 豆豆

生活随笔收集整理的這篇文章主要介紹了 python程序简单快速写能谱实验word报告小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

python程序簡單快速寫能譜實驗報告

第一步獲取報告的圖片
第二步獲取報告中的表格數(shù)據(jù)
第三步創(chuàng)建word 并自動寫入圖片和表格數(shù)據(jù)

第一步獲取報告的圖片

由于我需要的圖片在一個最原始的實驗測試報告中, 它的格式為.docx文件, 所以想要獲取 word 文檔中的圖片, 就需要將其后綴改寫為 .zip 使得它成為壓縮文件,然后再對它解壓, 找到word/media文件夾, 將里面的圖片單獨(dú)提取出來即可.

具體程序如下:
寫的有點(diǎn)糙,能實現(xiàn)就可以

# -*- coding:utf-8 -*- import shutil import easygui as g import zipfile import os from PIL import Image from PIL import ImageFile ''' 從文件夾中的提取出報告中的圖片 ''' def filePATH():if (not (os.path.exists(r"C:\Users\Public\Documents\Word"))):os.mkdir(r"C:\Users\Public\Documents\Word")if (not (os.path.exists(r"C:\Users\Public\Documents\Word1"))):os.mkdir(r"C:\Users\Public\Documents\Word1")if (not (os.path.exists(r"C:\Users\Public\Documents\res"))):os.mkdir(r"C:\Users\Public\Documents\res")def getWord():picpath = g.fileopenbox()shutil.copy(picpath, picpath.replace('.docx', '_副本.docx'))zip_path = picpath.replace('_副本.docx', '.zip')os.rename(picpath, zip_path)# 進(jìn)行解壓f = zipfile.ZipFile(zip_path, 'r')print(f.namelist())tmp_path = picpath.replace('.docx', '')print(tmp_path)# 將圖片提取并保存for file in f.namelist():f.extract(file, tmp_path)# 釋放該zip文件f.close()# 得到緩存文件夾中圖片列表pic = os.listdir(os.path.join(tmp_path, 'word/media'))print(pic)# 將圖片復(fù)制到最終的文件夾中for i in pic:# 根據(jù)word的路徑生成圖片的名稱if ".png" in i:shutil.copy(os.path.join(tmp_path + '/word/media', i),os.path.join(r'C:\Users\Public\Documents\Word', i))elif ".emf" in i:shutil.copy(os.path.join(tmp_path + '/word/media', i),os.path.join(r'C:\Users\Public\Documents\Word1', i.replace(".emf", ".png")))shutil.rmtree(tmp_path)def pilConvertJPG(path):for a, _, c in os.walk(path):for n in c:if '.jpg' in n or '.png' in n or '.jpeg' in n:img = Image.open(os.path.join(a, n))rgb_im = img.convert('RGB')error_img_path = os.path.join(a, n)os.remove(error_img_path)n = ''.join(filter(lambda n: ord(n) < 256, n))jpg_img_path = os.path.splitext(os.path.join(a, n).replace('\\', '/'))[0]jpg_img_path += '.jpg'print(jpg_img_path)rgb_im.save(jpg_img_path) def reNamepic():p1 = os.listdir(r'C:\Users\Public\Documents\Word')p2 = os.listdir(r'C:\Users\Public\Documents\Word1')path1 = r'C:\Users\Public\Documents\Word'path2 = r'C:\Users\Public\Documents\Word1'for i in p1:print(i, len(i))if len(i) == 10:i.replace("e", "e0")shutil.copy(path1 + "\\" + i, path1 + "\\" + i.replace("e", "e0"))os.remove(path1 + "\\" + i)for j in p2:print(j, len(j))if len(j) == 10:j.replace("ge", "ge0")shutil.copy(path2 + "\\" + j, path2 + "\\" + j.replace("ge", "ge0"))os.remove(path2 + "\\" + j)if __name__ == '__main__':pathw = r'C:\Users\Public\Documents\Word'pathw1 = r'C:\Users\Public\Documents\Word1'filePATH()getWord()pilConvertJPG(pathw)pilConvertJPG(pathw1)reNamepic()

第二步獲取報告中的表格數(shù)據(jù)

由于我需求的word報告要插入一張表格, 表格中要添加一些數(shù)據(jù), 所以就要提前準(zhǔn)備好表格數(shù)據(jù)集, 由于原始的word報告里所有信息是按照文本框插入的,見下圖所示, 想要直接獲取表格數(shù)據(jù)和文本信息是獲取不到的. 那么該怎么獲取想要的數(shù)據(jù)呢?
方法就是, 先將該word 轉(zhuǎn)換成PDF文件,再將PDF文件的內(nèi)容轉(zhuǎn)換成 txt 文本, 再從文本文件中逐行提取有用的信息, 這個方法費(fèi)了我好長時間, 要從一堆文字中提取出有規(guī)律的數(shù)據(jù), 形成規(guī)則的表格數(shù)據(jù)添加進(jìn)去

將一個txt文本去掉空行,并拆分成多個txt文本,
去掉空行后的txt文本,見下圖

拆分后的多個txt文件, 如下圖

代碼實現(xiàn):

# encoding=utf-8 import easygui as gg import osdef openTxt():if (not (os.path.exists(r"C:\Users\Public\Documents\TXT"))):os.mkdir(r"C:\Users\Public\Documents\TXT")txtpath = gg.fileopenbox()print(txtpath)return txtpathdef clearBlankLine(txtpath):file1 = open(txtpath, 'r', encoding='utf-8') # 要去掉空行的文件file2 = open(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt', 'w', encoding='utf-8') # 生成沒有空行的文件try:for line in file1.readlines():if line == '\n':line = line.strip("\n")file2.write(line)finally:file1.close()file2.close()def seprateTxt():a = [0]with open(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt', mode="r", encoding="utf-8") as f2:end = f2.readlines()# print(len(end))for n in end: ###########################if "Total" in n:a.append(end.index(n)+1) # 獲取個數(shù),即可知道列表共有幾行print(len(a))print(a)print(len(end))s = end # 按索引值將列表進(jìn)行拆分indices = a # 按索引值將列表進(jìn)行拆分parts = [s[i:j] for i, j in zip(indices, indices[1:] + [None])] # 按索引值將列表進(jìn)行拆分print(parts)i = 0while i < len(a):for b in parts[i]:print(b)for txt in b:with open(r'C:\Users\Public\Documents\TXT\txt%03d.txt' % (i + 1), mode="a", encoding="utf-8") as f3:f3.write("%s" % txt)f3.close()i += 1if __name__ == '__main__':clearBlankLine(openTxt())seprateTxt()os.remove(r'C:\Users\Public\Documents\能譜報告所需的表格數(shù)據(jù)集.txt')

如何從文本文件里獲取信息形成表格數(shù)據(jù)集? 代碼實現(xiàn)如下

def TableData(txtpath):a, b, c, d, e = [], [], [], [], []with open(txtpath, mode="r", encoding="utf-8") as file:endd = file.readlines()for i in endd:if "K-series" in i:a.append(endd[endd.index(i)])def remove(m):for j in a[m].split(" "):b.append(j)n = b.count('')for k in range(0, n):b.remove('')m = 0while m < len(a):remove(m)m += 1if b[0] == "O":b.insert(6, "/")b[6] = "/"else:passfor k in b:d.append(k.replace("\n", ""))if "F" in k and "Fe" not in k and "FeO" not in k:print(b.index(k))b.insert((b.index(k)) + 6, "/")########################每10個數(shù)將列表拆分成一個子列表##########################################n = 10f = [d[i:i + n] for i in range(0, len(d), n)]return f

運(yùn)行效果如下:

C:\Users\0\AppData\Local\Programs\Python\Python38\python.exe E:/Users/0/Desktop/PYTHON/nengpu/從文本獲取關(guān)鍵信息2.py ['O', '8', 'K-series', '37.73', '46.77', '61.61', '/', '0.00', '0.00', '4.62\n', 'Si', '14', 'K-series', '20.57', '25.49', '19.13', 'SiO2', '54.54', '44.00', '0.89\n', 'Al', '13', 'K-series', '12.70', '15.74', '12.30', 'Al2O3', '29.74', '24.00', '0.62\n', 'K', '19', 'K-series', '6.41', '7.95', '4.28', 'K2O', '9.58', '7.73', '0.23\n', 'Mg', '12', 'K-series', '1.72', '2.13', '1.85', 'MgO', '3.53', '2.85', '0.12\n', 'Fe', '26', 'K-series', '1.14', '1.41', '0.53', 'FeO', '1.81', '1.46', '0.09\n', 'Ti', '22', 'K-series', '0.29', '0.36', '0.16', 'TiO2', '0.59', '0.48', '0.04\n', 'Na', '11', 'K-series', '0.12', '0.15', '0.14', 'Na2O', '0.21', '0.17', '0.04\n'] [['O', '8', 'K-series', '37.73', '46.77', '61.61', '/', '0.00', '0.00', '4.62'], ['Si', '14', 'K-series', '20.57', '25.49', '19.13', 'SiO2', '54.54', '44.00', '0.89'], ['Al', '13', 'K-series', '12.70', '15.74', '12.30', 'Al2O3', '29.74', '24.00', '0.62'], ['K', '19', 'K-series', '6.41', '7.95', '4.28', 'K2O', '9.58', '7.73', '0.23'], ['Mg', '12', 'K-series', '1.72', '2.13', '1.85', 'MgO', '3.53', '2.85', '0.12'], ['Fe', '26', 'K-series', '1.14', '1.41', '0.53', 'FeO', '1.81', '1.46', '0.09'], ['Ti', '22', 'K-series', '0.29', '0.36', '0.16', 'TiO2', '0.59', '0.48', '0.04'], ['Na', '11', 'K-series', '0.12', '0.15', '0.14', 'Na2O', '0.21', '0.17', '0.04']] 8 80.68Process finished with exit code 0

第三步創(chuàng)建word 并自動寫入圖片和表格數(shù)據(jù)

思路: 將對應(yīng)的圖片和文本文件位置遍歷出來, 用循環(huán)語句寫入到word對象中
代碼如下

#-*- coding: UTF-8 -*- from docx import Document from docx.shared import Inches from docx.oxml.ns import qn from docx.shared import Pt from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.table import WD_ALIGN_VERTICAL import easygui as gg import os import shutildoc = Document() pic1 = [] pic2 = [] txt = [] table = [] table2 = [] def HeaderFooter(): header = doc.sections[0].header # 獲取第一個節(jié)的頁眉 # print('能譜報告', len(header.paragraphs)) paragraph = header.paragraphs[0] # 獲取頁眉的第一個段落 header = paragraph.add_run('能譜報告') # 添加頁面內(nèi)容 header.font.size = Pt(20) paragraph.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.CENTER # 段落文字居中設(shè)置 header.font.name = 'Times New Roman' header._element.rPr.rFonts.set(qn('w:eastAsia'), '宋體') # 設(shè)置中文字體 footer = doc.sections[0].footer # 獲取第一個節(jié)的頁腳 paragraph2 = footer.paragraphs[0] # 獲取頁腳的第一個段落 footer = paragraph2.add_run('實驗員：劉月寧%s審核員：楊武' % (' ' * 120)) # 添加頁腳內(nèi)容 footer.font.size = Pt(10) footer.font.name = 'Times New Roman' footer._element.rPr.rFonts.set(qn('w:eastAsia'), '宋體') # 設(shè)置中文字體 paragraph2.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 段落文字居中設(shè)置 def FirstSecondPragragh(txtpath): with open(txtpath, mode="r", encoding="utf-8") as file: endd = file.readlines() name = endd[0].replace(endd[0], "樣品號：" + endd[0]) data = endd[1].replace("Date:", "時間：") size = endd[2].replace("Image size:", "圖片尺寸：") mag = endd[3].replace("Mag:", "放大倍數(shù)：") p = doc.add_paragraph() run = p.add_run('%s %s%s %s' % (name.replace("\n", ""), data, size.replace("\n", ""), mag.replace("\n", ""))) # 使用add_run添加文字 run.font.size = Pt(12) # 字體大小設(shè)置，和word里面的字號相對應(yīng)，小一 p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT # 段落文字居中設(shè)置 run.font.name = 'Times New Roman' run._element.rPr.rFonts.set(qn('w:eastAsia'), '宋體') # 設(shè)置中文字體 run.bold = True def FirstPicture(pic1): doc.add_picture(pic1, width=Inches(3)) last_paragraph = doc.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # 圖片居中設(shè)置 last_paragraph.paragraph_format.space_before = Pt(0) last_paragraph.paragraph_format.space_after = Pt(0) def SecondPicture(pic2): doc.add_picture(pic2, width=Inches(6), height=Inches(2.5)) last_paragraph = doc.paragraphs[-1] last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER # 圖片居中設(shè)置 last_paragraph.paragraph_format.space_before = Pt(2) last_paragraph.paragraph_format.space_after = Pt(2) def AddTable(f, tenfour): table1 = doc.add_table(rows=11, cols=10) table1.style = doc.styles['Table Grid'] # 表格樣式 table1.name = 'Time New Roman' # table1.autofit = True # 很重要！ table1.style.font.size = Pt(9) table1.style.font.bold = True for r1 in range(11): # 循環(huán),將第3列所有單元格寬度調(diào)整至1Inches table1.cell(r1, 2).width = Inches(1) for r2 in range(11): # 循環(huán),將第2列所有單元格寬度調(diào)整至0.5Inches table1.cell(r2, 1).width = Inches(0.5) table1.cell(0, 0).text = "元素" table1.cell(0, 1).text = "原子\n序數(shù)" table1.cell(0, 2).text = "序列" table1.cell(0, 3).text = "非歸一化質(zhì)量百分比[Wt%]" table1.cell(0, 4).text = "歸一化質(zhì)量百分比[Wt%]" table1.cell(0, 5).text = "歸一化原子百分比[Wt%]" table1.cell(0, 6).text = "化合物" table1.cell(0, 7).text = "非歸一化質(zhì)量百分比[Wt%]" table1.cell(0, 8).text = "歸一化質(zhì)量百分比[Wt%]" table1.cell(0, 9).text = "絕對\n誤差[Wt%]" table1.cell(10, 0).text = "Total" table1.cell(10, 4).text = "100" table1.cell(10, 5).text = "100" table1.cell(10, 3).text = "%s" % tenfour for rr in range(len(table[iii])): for cc in range(10): table1.cell(rr + 1, cc).text = f[rr][cc] for r in range(11): # 循環(huán)將每一行，每一列都設(shè)置為水平居中 for c in range(10): table1.cell(r, c).vertical_alignment = WD_ALIGN_VERTICAL.CENTER table1.cell(r, c).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER doc.add_page_break() def Openpic1turedir(): msg = r'C:\Users\Public\Documents\Word' title = '文件選擇對話框' default = r'C:\Users\Public\Documents\Word' path = gg.diropenbox(msg, title, default) g1 = os.walk(path) for path, dir_list, file_list in g1: for file_name in file_list: if ".jpg" in os.path.join(path, file_name): # print(os.path.join(path, file_name)) picpath = os.path.join(path, file_name) print(picpath) pic1.append(picpath) print(pic1) def Openpic2turedir(): msg = r'C:\Users\Public\Documents\Word1' title = '文件選擇對話框' default = r'C:\Users\Public\Documents\Word1' path2 = gg.diropenbox(msg, title, default) g2 = os.walk(path2) for path, dir_list, file_list in g2: for file_name in file_list: if ".jpg" in os.path.join(path, file_name): # print(os.path.join(path, file_name)) picpath2 = os.path.join(path, file_name) print(picpath2) pic2.append(picpath2) print(pic2) def Opentxt(): msg = r'C:\Users\Public\Documents\TXT' title = '文件選擇對話框' default = r'C:\Users\Public\Documents\TXT' pathtxt = gg.diropenbox(msg, title, default) gt = os.walk(pathtxt) for path, dir_list, file_list in gt: for file_name in file_list: if ".txt" in os.path.join(path, file_name): # print(os.path.join(path, file_name)) txtpath = os.path.join(path, file_name) print(txtpath) txt.append(txtpath) print(txt) def TableData(txtpath): a, b, c, d, e = [], [], [], [], [] with open(txtpath, mode="r", encoding="utf-8") as file: endd = file.readlines() for i in endd: if "K-series" in i: a.append(endd[endd.index(i)]) def remove(m): for j in a[m].split(" "): b.append(j) n = b.count('') for k in range(0, n): b.remove('') m = 0 while m < len(a): remove(m) m += 1 if b[0] == "O": b.insert(6, "/") b[6] = "/" else: pass for k in b: d.append(k.replace("\n", "")) if "F" in k and "Fe" not in k and "FeO" not in k: print(b.index(k)) b.insert((b.index(k)) + 6, "/") ########################每10個數(shù)將列表拆分成一個子列表########################################## n = 10 f = [d[i:i + n] for i in range(0, len(d), n)] return f def TableData2(txtpath): with open(txtpath, mode="r", encoding="utf-8") as file2: endd2 = file2.readlines() down = endd2[-1].split(" ") # print(down) n = down.count('') for k in range(0, n): down.remove('') tenfour = down[1] # print(tenfour) return tenfour if __name__ == '__main__': Openpic1turedir() Openpic2turedir() Opentxt() HeaderFooter() iii = 0 while iii < len(txt): FirstSecondPragragh(txt[iii]) TableData(txt[iii]) TableData2(txt[iii]) table.append(TableData(txt[iii])) table2.append(TableData2(txt[iii])) print(table[iii]) print(table2[iii]) FirstPicture(pic1[iii]) SecondPicture(pic2[iii]) AddTable(table[iii], table2[iii]) iii += 1 doc.save(r'C:\Users\Public\Documents\res\輸出的能譜報告.docx') gg.msgbox("您的報告已生成在 C:\\Users\Public\Documents\\res ", "提示") shutil.rmtree(r'C:\Users\Public\Documents\Word1') shutil.rmtree(r'C:\Users\Public\Documents\Word') shutil.rmtree(r'C:\Users\Public\Documents\TXT')

報告效果如下:

總結(jié)

以上是生活随笔為你收集整理的python程序简单快速写能谱实验word报告的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯，歡迎將生活随笔推薦給好友。

上一篇： PA动画使用教程
下一篇：用python读取YUV文件转RGB