doc文件转换html,HTML+CSS入门 如何使用POI将doc文件转换为HTML
本篇教程介紹了HTML+CSS入門 如何使用POI將doc文件轉換為HTML,希望閱讀本篇文章以后大家有所收獲,幫助大家HTML+CSS入門。
<
需要的jar包有:有一些是依賴包,可以使用maven下載
doc文件轉換為html文件
package?com.gsww.sxzz.controller.service;
import?org.apache.poi.hwpf.HWPFDocument;
import?org.apache.poi.hwpf.converter.PicturesManager;
import?org.apache.poi.hwpf.converter.WordToHtmlConverter;
import?org.apache.poi.hwpf.usermodel.Picture;
import?org.apache.poi.hwpf.usermodel.PictureType;
import?org.jsoup.Jsoup;
import?org.w3c.dom.Document;
import?javax.xml.parsers.DocumentBuilderFactory;
import?javax.xml.parsers.ParserConfigurationException;
import?javax.xml.transform.OutputKeys;
import?javax.xml.transform.Transformer;
import?javax.xml.transform.TransformerException;
import?javax.xml.transform.TransformerFactory;
import?javax.xml.transform.dom.DOMSource;
import?javax.xml.transform.stream.StreamResult;
import?java.io.*;
import?java.util.List;
/**
*?Created?by?Carey?on?15-2-2.
*/
public?class?docTohtml?{
public?static?void?main(String?argv[])?{
try?{
convert2Html("D:\\b.doc","D:\\1.html");
}?catch?(Exception?e)?{
e.printStackTrace();
}
}
//輸出html文件
public?static?void?writeFile(String?content,?String?path)?{
FileOutputStream?fos?=?null;
BufferedWriter?bw?=?null;
org.jsoup.nodes.Document?doc?=?Jsoup.parse(content);
String?styleOld=doc.getElementsByTag("style").html();
//統一字體格式為宋體
styleOld=styleOld.replaceAll("font-family:.+(?=;\\b)",?"font-family:SimSun");
doc.getElementsByTag("head").empty();
doc.getElementsByTag("head").append("");
doc.getElementsByTag("head").append("?");
doc.getElementsByTag("style").append(styleOld);
/*正則表達式查詢字體內容:font-family:.+(?=;\b)*/
System.out.println(content);
content=doc.html();
content=content.replace("",?"");
try?{
File?file?=?new?File(path);
fos?=?new?FileOutputStream(file);
bw?=?new?BufferedWriter(new?OutputStreamWriter(fos,"UTF-8"));
bw.write(content);
}?catch?(FileNotFoundException?fnfe)?{
fnfe.printStackTrace();
}?catch?(IOException?ioe)?{
ioe.printStackTrace();
}?finally?{
try?{
if?(bw?!=?null)
bw.close();
if?(fos?!=?null)
fos.close();
}?catch?(IOException?ie)?{
}
}
}
//word?轉?html
public?static?void?convert2Html(String?fileName,?String?outPutFile)
throws?TransformerException,?IOException,
ParserConfigurationException?{
HWPFDocument?wordDocument?=?new?HWPFDocument(new?FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new?FileInputStream(inputFile));
//兼容2007?以上版本
//????????XSSFWorkbook??xssfwork=new?XSSFWorkbook(new?FileInputStream(fileName));
WordToHtmlConverter?wordToHtmlConverter?=?new?WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument());
wordToHtmlConverter.setPicturesManager(?new?PicturesManager()
{
public?String?savePicture(?byte[]?content,
PictureType?pictureType,?String?suggestedName,
float?widthInches,?float?heightInches?)
{
return?"test/"+suggestedName;
}
}?);
wordToHtmlConverter.processDocument(wordDocument);
//save?pictures
List?pics=wordDocument.getPicturesTable().getAllPictures();
if(pics!=null){
for(int?i=0;i
Picture?pic?=?(Picture)pics.get(i);
System.out.println();
try?{
pic.writeImageContent(new?FileOutputStream("D:/test/"
+?pic.suggestFullFileName()));
}?catch?(FileNotFoundException?e)?{
e.printStackTrace();
}
}
}
Document?htmlDocument?=?wordToHtmlConverter.getDocument();
ByteArrayOutputStream?out?=?new?ByteArrayOutputStream();
DOMSource?domSource?=?new?DOMSource(htmlDocument);
StreamResult?streamResult?=?new?StreamResult(out);
TransformerFactory?tf?=?TransformerFactory.newInstance();
Transformer?serializer?=?tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING,?"UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT,?"yes");
serializer.setOutputProperty(OutputKeys.METHOD,?"HTML");
serializer.transform(domSource,?streamResult);
out.close();
writeFile(new?String(out.toByteArray()),?outPutFile);
}
}
遇到的問題,當doc轉換為html時不會將圖像的線條給轉換過來。只有在table表格中才可以轉換為span標簽。如果要作下滑線,可以放一個table的單元格只設定下邊框就可以完美轉換為html了。
將html轉換為pdf
package?com.gsww.sxzz.controller.service;
import?com.lowagie.text.pdf.BaseFont;
import?org.xhtmlrenderer.pdf.ITextFontResolver;
import?org.xhtmlrenderer.pdf.ITextRenderer;
import?java.io.File;
import?java.io.FileNotFoundException;
import?java.io.FileOutputStream;
import?java.io.OutputStream;
/**
*?Created?by?Carey?on?15-2-2.
*/
public?class?htmlToPdf?{
public?boolean?convertHtmlToPdf(String?inputFile,?String?outputFile)
{
try?{
OutputStream?????os?=?new?FileOutputStream(outputFile);
ITextRenderer?renderer?=?new?ITextRenderer();
String?url?=?new?File(inputFile).toURI().toURL().toString();
renderer.setDocument(url);
//?解決中文支持問題
ITextFontResolver?fontResolver?=?renderer.getFontResolver();
/*fontResolver.addFont("C:\\Windows\\Fonts\\simsunb.ttf",?BaseFont.IDENTITY_H,?BaseFont.NOT_EMBEDDED);
*///宋體文件的相對路徑
fontResolver.addFont("C:\\Windows\\Fonts\\simsun.ttc",?BaseFont.IDENTITY_H,?BaseFont.NOT_EMBEDDED);
renderer.getSharedContext().setBaseURL("file:/D:/");
renderer.layout();
renderer.createPDF(os);
os.flush();
os.close();
}?catch?(Exception?e)?{
//?TODO?Auto-generated?catch?block
e.printStackTrace();
}
return?true;
}
public???static??void??main(String?[]?args){
htmlToPdf?html2Pdf?=new?htmlToPdf();
try?{
html2Pdf.convertHtmlToPdf("D:\\1.html","D:\\index.pdf");
}?catch?(Exception?e)?{
e.printStackTrace();
}
}
}
本文由職坐標整理發布,歡迎關注職坐標WEB前端HTML/CSS頻道,獲取更多HTML/CSS知識!
總結
以上是生活随笔為你收集整理的doc文件转换html,HTML+CSS入门 如何使用POI将doc文件转换为HTML的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 灰飞烟灭!美国电子烟巨头遭遇灭顶之灾
- 下一篇: 浏览器使用java_java如何调用本地