當(dāng)前位置：首頁 > 编程语言 > python >内容正文

python

编程doc转html,Python批量将word转html，并将html内容发布至网站。

發(fā)布時(shí)間：2024/9/27 python 36 豆豆

生活随笔收集整理的這篇文章主要介紹了编程doc转html,Python批量将word转html，并将html内容发布至网站。小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

搜索熱詞

下面是編程之家 jb51.cc 通過網(wǎng)絡(luò)收集整理的代碼片段。

編程之家小編現(xiàn)在分享給大家，也給大家做個(gè)參考。

#coding=utf-8

__author__ = 'zhm'

from win32com import client as wc

import os

import time

import random

import MysqLdb

import re

def wordsToHtml(dir):#批量把文件夾的word文檔轉(zhuǎn)換成html文件

#金山WPS調(diào)用，搶先版的用KWPS，正式版WPS

word = wc.Dispatch('KWPS.Application')

for path,subdirs,files in os.walk(dir):

for wordFile in files:

wordFullName = os.path.join(path,wordFile)

#print "word:" + wordFullName

doc = word.Documents.Open(wordFullName)

wordFile2 = unicode(wordFile,"gbk")

dotIndex = wordFile2.rfind(".")

if(dotIndex == -1):

print '********************ERROR: 未取得后綴名！'

fileSuffix = wordFile2[(dotIndex + 1) : ]

if(fileSuffix == "doc" or fileSuffix == "docx"):

fileName = wordFile2[ : dotIndex]

htmlName = fileName + ".html"

htmlFullName = os.path.join(unicode(path,"gbk"),htmlName)

# htmlFullName = unicode(path,"gbk") + "\\" + htmlName

print u'生成了html文件：' + htmlFullName

doc.SaveAs(htmlFullName,8)

doc.Close()

word.Quit()

print ""

print "Finished!"

def html_add_to_db(dir):#將轉(zhuǎn)換成功的html文件批量插入數(shù)據(jù)庫中。

conn = MysqLdb.connect(

host='localhost',port=3306,user='root',passwd='root',db='test',charset='utf8'

)

cur = conn.cursor()

for path,files in os.walk(dir):

for htmlFile in files:

htmlFullName = os.path.join(path,htmlFile)

title = os.path.splitext(htmlFile)[0]

targetDir = 'D:/files/htmls/' #D:/files為web服務(wù)器配置的靜態(tài)目錄

sconds = time.time()

msconds = sconds * 1000

targetFile = os.path.join(targetDir,str(int(msconds))+str(random.randint(100,10000)) +'.html')

htmlFile2 = unicode(htmlFile,"gbk")

dotIndex = htmlFile2.rfind(".")

if(dotIndex == -1):

print '********************ERROR: 未取得后綴名！'

fileSuffix = htmlFile2[(dotIndex + 1) : ]

if(fileSuffix == "htm" or fileSuffix == "html"):

if not os.path.exists(targetDir):

os.makedirs(targetDir)

htmlFullName = os.path.join(unicode(path,htmlFullName)

htFile = open(htmlFullName,'rb')

#獲取網(wǎng)頁內(nèi)容

htmStrCotent = htFile.read()

#找出里面的圖片

img=re.compile(r"""""",re.I)

m = img.findall(htmStrCotent)

for tagContent in m:

imgSrc = unicode(tagContent,"gbk")

imgSrcFullName = os.path.join(path,imgSrc)

#上傳圖片

imgTarget = 'D:/files/images/whzx/'

img_sconds = time.time()

img_msconds = sconds * 1000

targetImgFile = os.path.join(imgTarget,str(int(img_msconds))+str(random.randint(100,10000)) +'.png')

if not os.path.exists(imgTarget):

os.makedirs(imgTarget)

if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):

tmpImgFile = open(imgSrcFullName,'rb')

tmpWriteImgFile = open(targetImgFile,"wb")

tmpWriteImgFile.write(tmpImgFile.read())

tmpImgFile.close()

tmpWriteImgFile.close()

htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])

if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):

#用iframe包裝轉(zhuǎn)換好的html文件。

iframeHtml='''

function iFrameHeight() {

var ifm= document.getElementById("iframepage");

var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;

if(ifm != null && subWeb != null) {

ifm.height = subWeb.body.scrollHeight;

}

marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" >

'''

tmpTargetFile = open(targetFile,"wb")

tmpTargetFile.write(htmStrCotent)

tmpTargetFile.close()

htFile.close()

try:

# 執(zhí)行

sql = "insert into common_article(title,content) values(%s,%s)"

param = (unicode(title,iframeHtml)

cur.execute(sql,param)

except:

print "Error: unable to insert data"

cur.close()

conn.commit()

# 關(guān)閉數(shù)據(jù)庫連接

conn.close()

if __name__ == '__main__':

wordsToHtml('d:/word')

html_add_to_db('d:/word')

以上是編程之家(jb51.cc)為你收集整理的全部代碼內(nèi)容，希望文章能夠幫你解決所遇到的程序開發(fā)問題。

如果覺得編程之家網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將編程之家網(wǎng)站推薦給程序員好友。

總結(jié)

如果覺得編程之家網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將編程之家網(wǎng)站推薦給程序員好友。

本圖文內(nèi)容來源于網(wǎng)友網(wǎng)絡(luò)收集整理提供，作為學(xué)習(xí)參考使用，版權(quán)屬于原作者。

創(chuàng)作挑戰(zhàn)賽新人創(chuàng)作獎(jiǎng)勵(lì)來咯，堅(jiān)持創(chuàng)作打卡瓜分現(xiàn)金大獎(jiǎng)

總結(jié)

以上是生活随笔為你收集整理的编程doc转html,Python批量将word转html，并将html内容发布至网站。的全部內(nèi)容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將生活随笔推薦給好友。

上一篇：取文件 shell_webshell文件
下一篇： python软件是什么原因引起的_Pyt