Java读取word文档里的复杂型表格(任免表)
生活随笔
收集整理的這篇文章主要介紹了
Java读取word文档里的复杂型表格(任免表)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
使用apache-poi讀取word文檔里的復雜型表格
這里使用的任免表編輯器產生的word文檔。
word模板:https://download.csdn.net/download/weixin_41420919/85708792
問題: 需要讀取任免編輯器生成的word里面的內容,其中生成的word是一個表格,需要根據表格的行列數獲取相應的內容。
難點:在于表格里有圖片,以及單元格的合并。
解決方案:
maven坐標
<!-- 版本號要統一不然會報讀取word文檔錯誤--><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.15</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.15</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.15</version></dependency>測試程序 (打印word所有的表格以及表格字段)
//注意測試程序沒有關閉流,實際使用要關閉流String filePath = "D:\\xujinwei\\yz_dagl\\testWord\\任免表99.doc";WordImportUtil test = new WordImportUtil(filePath);InputStream is;FileInputStream in = new FileInputStream(filePath);// 載入文檔POIFSFileSystem pfs = new POIFSFileSystem(in);HWPFDocument hwpf = new HWPFDocument(pfs);Range range = hwpf.getRange();// 得到文檔的讀取范圍TableIterator it = new TableIterator(range);Map<String,String> map = new HashMap<>();int tt = 1;while (it.hasNext()) {Table tb = it.next();System.out.println("=========================================第"+tt+"個表格==");// 迭代行,默認從0開始int i = 0;for ( ; i < tb.numRows(); i++) {TableRow tr = tb.getRow(i);// 迭代列,默認從0開始int j = 0;for ( ; j < tr.numCells(); j++) {TableCell td = tr.getCell(j);// 取得單元格// 取得單元格的內容String s = "";for (int k = 0; k < td.numParagraphs(); k++) {Paragraph para = td.getParagraph(k);// 獲取第k個段落s += para.text().trim();}map.put(s, "i:" + i + " " + "j:" + j);}map.forEach((x,y)->{System.out.println("===========================key========"+x);System.out.println("===========================value========"+y);});map.clear();}tt++;}//獲取圖片PicturesTable picturesTable = hwpf.getPicturesTable();List<Picture> allPictures = picturesTable.getAllPictures();for (Picture picture : allPictures) {String picBase64Str = Base64Utils.encodeToString(picture.getContent());}return "";這里對所有表格的字段進行打印,實測時,我發現我的word文檔一個表格被解析成三個表格。從第二個表格起才是我所需要的數據以及因為有些單元格合并,獲取的字段不對, 因此我還做了特殊處理。下面提供一個對合并的單元格的解決方案
for ( ; j < tr.numCells(); j++) {TableCell td = tr.getCell(j);// 取得單元格// 取得單元格的內容String s = ""; //這個s是合并單元格的結果,如果想要獲取單獨的可以在k那里進行操作,可以通過判斷k的值,直接return //para.text().trim();for (int k = 0; k < td.numParagraphs(); k++) {Paragraph para = td.getParagraph(k);// 獲取第k個段落s += para.text().trim();}}完整的測試類
package com.hrtac.application.util;import com.hrtac.application.bean.xt001.DA051Bean; import com.hrtac.application.bean.xt001.DA052Bean; import org.apache.commons.lang3.StringUtils; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.PicturesTable; import org.apache.poi.hwpf.usermodel.*; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.springframework.util.Base64Utils;import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map;/*** Description:* User: xu jin wei* Date: 2022-06-15* Time: 8:44*/ public class WordImportUtil {private String filePath ;private HWPFDocument hwpf;private InputStream in;public WordImportUtil(String filePath){this.in = null;// 載入文檔try {in = new FileInputStream(filePath);POIFSFileSystem pfs = new POIFSFileSystem(in);this.hwpf = new HWPFDocument(pfs);} catch (FileNotFoundException e) {e.printStackTrace();} catch (IOException e) {e.printStackTrace();}}public String getTableText(int tableIndex, int row ,int col, boolean isSpecial){int noSpecial = 3;return getTableText(tableIndex, row ,col, isSpecial, noSpecial);}public String getTableText(int tableIndex, int row ,int col, boolean isSpecial, int specialParagraph){Range range = hwpf.getRange();// 得到文檔的讀取范圍TableIterator it = new TableIterator(range);List<String> list = new ArrayList<>();Map<String,String> map = new HashMap<>();StringBuffer errMsg = new StringBuffer();int tt = 1;int i =0;while (it.hasNext()) {Table tb = it.next();if(i==tableIndex) {if(row>=tb.numRows()){return "";}TableRow tr = tb.getRow(row);TableCell td = tr.getCell(col);// 取得單元格// 取得單元格的內容String s = "";for (int k = 0; k < td.numParagraphs(); k++) {Paragraph para = td.getParagraph(k);// 獲取第k個段落s += para.text().trim();if(isSpecial && row==6 && col==2 && specialParagraph==k ){//在職教育-學歷return para.text().trim();}if(isSpecial && row==6 && col==2 && specialParagraph==k ){//在職教育-學位return para.text().trim();}if(isSpecial && row==6 && col==4 && specialParagraph==k ){//在職教育-畢業院校return para.text().trim();}if(isSpecial && row==6 && col==4 && specialParagraph==k ){//在職教育-畢業專業return para.text().trim();}}return s;}i++;}return "";}public void closeStream() {if(this.hwpf !=null){try {this.hwpf.close();} catch (IOException e) {e.printStackTrace();}}if (this.in != null) {try {this.in.close();} catch (IOException e) {e.printStackTrace();}}}public DA051Bean getDA051BaseInfoFromWord(WordImportUtil wordImportUtil) {DA051Bean da051Bean = new DA051Bean();int tableIndex = 1;da051Bean.setXM00001(wordImportUtil.getTableText(tableIndex,0,1,false));da051Bean.setXB00001(wordImportUtil.getTableText(tableIndex,0,3,false));String CSNY001 = wordImportUtil.getTableText(tableIndex, 0, 5,false);if(StringUtils.isNotEmpty(CSNY001)) CSNY001 = CSNY001.substring(0,CSNY001.indexOf("(")).replace(".","");da051Bean.setCSNY001(CSNY001);da051Bean.setMZ00001(wordImportUtil.getTableText(tableIndex,1,1,false));da051Bean.setJG00001(wordImportUtil.getTableText(tableIndex,1,3,false));da051Bean.setCSD0001(wordImportUtil.getTableText(tableIndex,1,5,false));da051Bean.setRDSJ001(wordImportUtil.getTableText(tableIndex,2,1,false).replace(".",""));da051Bean.setCJGZ002(wordImportUtil.getTableText(tableIndex,2,3,false).replace(".",""));da051Bean.setJKZK001(wordImportUtil.getTableText(tableIndex,2,5,false));da051Bean.setZYJS007(wordImportUtil.getTableText(tableIndex,3,1,false));da051Bean.setZYTC001(wordImportUtil.getTableText(tableIndex,3,3,false));da051Bean.setXLMC001(wordImportUtil.getTableText(tableIndex,4,2,false));da051Bean.setXWMC001(wordImportUtil.getTableText(tableIndex,5,2,false));da051Bean.setBYYX001(wordImportUtil.getTableText(tableIndex,4,4,false));da051Bean.setBYZY001(wordImportUtil.getTableText(tableIndex,5,4,false));// 在職教育 學歷和學位合并了 院校和專業也合并了da051Bean.setXLMC002(wordImportUtil.getTableText(tableIndex,6,2,true,0));da051Bean.setXWMC002(wordImportUtil.getTableText(tableIndex,6,2,true,1));da051Bean.setBYYX002(wordImportUtil.getTableText(tableIndex,6,4,true,0));da051Bean.setBYZY002(wordImportUtil.getTableText(tableIndex,6,4,true,1));da051Bean.setXRZW001(wordImportUtil.getTableText(tableIndex,8,1,false));da051Bean.setNRZW001(wordImportUtil.getTableText(tableIndex,9,1,false));da051Bean.setNMZW001(wordImportUtil.getTableText(tableIndex,10,1,false));da051Bean.setJL00001(wordImportUtil.getTableText(tableIndex,11,1,false));tableIndex++;da051Bean.setJCQK001(wordImportUtil.getTableText(tableIndex,0,1,false));da051Bean.setNDKH001(wordImportUtil.getTableText(tableIndex,1,1,false));da051Bean.setRMLY001(wordImportUtil.getTableText(tableIndex,2,1,false));//和家庭成員有關 7 --11 8--12 9--13 10--14da051Bean.setCBDW001(wordImportUtil.getTableText(tableIndex,getCBDW(wordImportUtil),1,false));return da051Bean;}public int getCBDW(WordImportUtil wordImportUtil){int index = -1;if("呈報單位".equals(wordImportUtil.getTableText(2,11,0,false))){index = 11;};if("呈報單位".equals(wordImportUtil.getTableText(2,12,0,false))){index = 12;};if("呈報單位".equals(wordImportUtil.getTableText(2,13,0,false))){index = 13;};if("呈報單位".equals(wordImportUtil.getTableText(2,14,0,false))){index = 14;};return index;}public List<DA052Bean> getDa052BeanListFormWord(WordImportUtil wordImportUtil){int index = getCBDW(wordImportUtil);List<DA052Bean> list = new ArrayList<>();for(int row = 4; row < index; row ++){DA052Bean da052Bean = new DA052Bean();int col = 1;da052Bean.setCYCW001(wordImportUtil.getTableText(2,row,col,false));da052Bean.setCYXM001(wordImportUtil.getTableText(2,row,++col,false));da052Bean.setCYCS001(wordImportUtil.getTableText(2,row,++col,false));da052Bean.setCYZZ001(wordImportUtil.getTableText(2,row,++col,false));da052Bean.setCYDW001(wordImportUtil.getTableText(2,row,++col,false));da052Bean.setXH00001(String.valueOf((row-3)));list.add(da052Bean);}for(int k = list.size(); k <10 ;k++){DA052Bean da052Bean = new DA052Bean();da052Bean.setXH00001(String.valueOf( (k + 1)));list.add(da052Bean);}return list;}public String getPicBase64String(WordImportUtil wordImportUtil) {PicturesTable picturesTable = wordImportUtil.hwpf.getPicturesTable();List<Picture> allPictures = picturesTable.getAllPictures();for (Picture picture : allPictures) {return Base64Utils.encodeToString(picture.getContent());}return "";}public static void main(String[] args)throws Exception {String filePath = "D:\\xujinwei\\yz_dagl\\testWord\\任免表99.doc";WordImportUtil test = new WordImportUtil(filePath);InputStream is;FileInputStream in = new FileInputStream(filePath);// 載入文檔POIFSFileSystem pfs = new POIFSFileSystem(in);HWPFDocument hwpf = new HWPFDocument(pfs);Range range = hwpf.getRange();// 得到文檔的讀取范圍TableIterator it = new TableIterator(range);Map<String,String> map = new HashMap<>();int tt = 1;while (it.hasNext()) {Table tb = it.next();System.out.println("=========================================第"+tt+"個表格==");// 迭代行,默認從0開始// if(tt==2) {int i = 0;for ( ; i < tb.numRows(); i++) {TableRow tr = tb.getRow(i);// 迭代列,默認從0開始int j = 0;for ( ; j < tr.numCells(); j++) {TableCell td = tr.getCell(j);// 取得單元格// 取得單元格的內容String s = "";for (int k = 0; k < td.numParagraphs(); k++) {Paragraph para = td.getParagraph(k);// 獲取第k個段落s += para.text().trim();}map.put(s, "i:" + i + " " + "j:" + j);}map.forEach((x,y)->{System.out.println("===========================key========"+x);System.out.println("===========================value========"+y);});map.clear();}// }tt++;}/*WordUtils wordUtils = new WordUtils(false);wordUtils.openDocument(filePath);Dispatch doc = wordUtils.getDoc();// 獲取所有表格Dispatch tables = Dispatch.get(doc, "Tables").toDispatch();// 獲取表格數目int tablesCount = Dispatch.get(tables, "Count").getInt();System.out.println("文檔中共有" + tablesCount + "個表格");for (int i = 1; i <= tablesCount; i++) {// 獲取第i個表格Dispatch table = Dispatch.call(tables, "Item", new Variant(i)).toDispatch();// 獲取該表格所有行Dispatch rows = Dispatch.call(table, "Rows").toDispatch();// 獲取該表格所有列Dispatch columns = Dispatch.call(table, "Columns").toDispatch();// 獲取該表格行數int rowsCount = Dispatch.get(rows, "Count").getInt();// 獲取該表格列數int columnsCount = Dispatch.get(columns, "Count").getInt();for (int j = 1; j <= rowsCount; j++) {for (int k = 1; k <= columnsCount; k++) {Dispatch cell = Dispatch.call(table, "Cell", new Variant(j), new Variant(k)).toDispatch();Dispatch Range = Dispatch.get(cell, "Range").toDispatch();String text = Dispatch.get(Range, "Text").getString();//去掉最后的回車符;text = text.substring(0, text.length() - 2);System.out.println("單元格中的內容:" + text);}}} */// test.testWord(filePath);// test.spireForTableOfDoc(filePath);}}總結
以上是生活随笔為你收集整理的Java读取word文档里的复杂型表格(任免表)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Windows Server 2008
- 下一篇: 探究Java的设计原则