Java-pdf无限压缩方案-优化内存问题
生活随笔
收集整理的這篇文章主要介紹了
Java-pdf无限压缩方案-优化内存问题
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
背景
因項目需求,項目中需要提供pdf壓縮功能。將某一頁壓縮至1M大小。 場景的Java的pdf處理方案就是itext pdfbox 以及 apose方案一:itext壓縮(不推薦)
代碼
/*** @param src 源文件* @param dest 目標文件* @throws IOException* @throws DocumentException*/public static void compressPdf(String src, String dest, float factor)throws PdfCompressException {log.info("use radio {} compress file:{}>>>{}", factor, src, dest);// 讀取pdf文件PdfReader reader = null;PdfStamper stamper = null;ByteArrayOutputStream imgBytes = null;try {reader = new PdfReader(src);int n = reader.getXrefSize();PdfObject object;PRStream stream;// Look for image and manipulate image streamfor (int i = 0; i < n; i++) {object = reader.getPdfObject(i);if (object == null || !object.isStream()) {continue;}stream = (PRStream) object;PdfObject pdfSubByte = stream.get(PdfName.SUBTYPE);if (pdfSubByte != null && pdfSubByte.toString().equals(PdfName.IMAGE.toString())) {PdfImageObject image = new PdfImageObject(stream);BufferedImage bi = image.getBufferedImage();if (bi == null) {continue;}int width = bi.getWidth();int height = bi.getHeight();AffineTransform at = AffineTransform.getScaleInstance(1, 1);if ((int) (width * factor) > 0 && (int) (bi.getHeight() * factor) > 0) {width = (int) (width * factor);height = (int) (bi.getHeight() * factor);at = AffineTransform.getScaleInstance(factor, factor);}BufferedImage img = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);Graphics2D g = img.createGraphics();g.drawRenderedImage(bi, at);imgBytes = new ByteArrayOutputStream();ImageIO.write(img, "JPG", imgBytes);stream.clear();stream.setData(imgBytes.toByteArray(), false, PRStream.BEST_COMPRESSION);stream.put(PdfName.TYPE, PdfName.XOBJECT);stream.put(PdfName.SUBTYPE, PdfName.IMAGE);stream.put(PdfName.FILTER, PdfName.DCTDECODE);stream.put(PdfName.WIDTH, new PdfNumber(width));stream.put(PdfName.HEIGHT, new PdfNumber(height));stream.put(PdfName.BITSPERCOMPONENT, new PdfNumber(8));stream.put(PdfName.COLORSPACE, PdfName.DEVICERGB);}}stamper = new PdfStamper(reader, new FileOutputStream(dest));} catch (Exception e) {log.error("pdf compress error:{}>>>{}", src, dest);log.error("pdf compress error:", e);throw new PdfCompressException(e.getMessage());} finally {if (imgBytes != null) {try {imgBytes.close();} catch (IOException e) {log.error("imgBytes close failed when compress pdf:", e);}}if (stamper != null) {try {stamper.close();} catch (Exception e) {log.error("stamper close failed when compress pdf:", e);}}if (reader != null) {reader.close();}}}方案描述
提供一個壓縮方法,先嘗試 1倍壓縮判斷是否小于1M,然后0.9,0.8...0.1,直至factor<=0.1或者壓縮后文件小于等于1M才停止壓縮。方案問題
該方案相當于至針對pdf當中的圖片進行壓縮,之前遇見一個10M的非圖片pdf,就是一個表格,但是無論如何都壓不下來。 后面采用wps和apose以及在線的pdf壓縮工具同樣處理不了。 壓縮效率低,內存消耗巨大。方案二:pdfbox方案(強烈不推薦)
代碼
public static void image2Pdf(String inputFile, String pdfFile) throws Image2PdfException {log.info("convert image 2 pdf :{}>>>{}", inputFile, pdfFile);Document doc = null;ByteArrayOutputStream outStream = null;PdfWriter pdfWriter = null;FileInputStream fi = null;try {File file = new File(inputFile);doc = new Document(PageSize.A4, 20, 20, 20, 20);pdfWriter = PdfWriter.getInstance(doc, new FileOutputStream(pdfFile));doc.open();doc.newPage();Image image;if (file.getName().toLowerCase().endsWith("jpg") || file.getName().toLowerCase().endsWith("jpeg")) {java.awt.Image awtImage = Toolkit.getDefaultToolkit().createImage(file.getAbsolutePath());image = Image.getInstance(awtImage, null);} else {image = Image.getInstance(file.getAbsolutePath());}float height = image.getHeight();float width = image.getWidth();if (width > height) {fi = new FileInputStream(file);BufferedImage src = ImageIO.read(fi);BufferedImage des1 = RotateImage.rotate(src, 90);String type = file.getName().substring(file.getName().lastIndexOf(".") + 1).toLowerCase();outStream = new ByteArrayOutputStream();ImageIO.write(des1, type, outStream);image = Image.getInstance(outStream.toByteArray());height = image.getHeight();width = image.getWidth();}int percent = getPercent(height, width);image.setAlignment(Image.MIDDLE);image.scalePercent(percent);float x = (PageSize.A4.getWidth() - image.getScaledWidth()) / 2;float y = (PageSize.A4.getHeight() - image.getScaledHeight()) / 2;image.setAbsolutePosition(x, y);doc.add(image);} catch (Exception e) {log.error("image 2 pdf failed:{}>>>{}", inputFile, pdfFile);log.error("exception info:", e);throw new Image2PdfException(e.getMessage());} finally {if (doc != null) {try {doc.close();} catch (Exception e) {log.info("空文檔:", e);}}if (pdfWriter != null) {pdfWriter.close();}if (outStream != null) {try {outStream.close();} catch (IOException e) {e.printStackTrace();}}if (fi != null) {try {fi.close();} catch (IOException e) {e.printStackTrace();}}}}/*** 等比壓縮,獲取壓縮百分比** @param height 圖片的高度* @param weight 圖片的寬度* @return 壓縮百分比*/private static int getPercent(float height, float weight) {float percent = 0.0F;if (height > weight) {percent = (PageSize.A4.getHeight() - 120) / height * 100;} else {percent = (PageSize.A4.getWidth() - 120) / weight * 100;}return Math.round(percent);}public static void pdf2ImagePdf(String source, String targetPdf, int dpi)throws Pdf2ImageException, PdfSplitException, Image2PdfException {String imagePath = source.substring(0, source.lastIndexOf("."));File imageDir = YhPdfUtil.pdf2Images(source, imagePath, dpi);File[] files = imageDir.listFiles();if (files == null || files.length == 0) {throw new Pdf2ImageException("no image found,may pdf 2 image failed");} else {if (files.length == 1) {log.info("pdf just one img ,just convert");YhPdfUtil.image2Pdf(files[0].getAbsolutePath(), targetPdf);} else {log.info("so much images,convert every img and merge all...");String tmpPdfDir =source.replace("\\", "/").substring(0, source.lastIndexOf(".")) + "-pdf-" + System.currentTimeMillis() + "/";File fpd = new File(tmpPdfDir);if (!fpd.exists()) {fpd.mkdirs();}for (int k = 0; k < files.length; k++) {String fn =files[k].getName().substring(0, files[k].getName().lastIndexOf(".")) + k + ".pdf";String tmpPdf = tmpPdfDir + fn;YhPdfUtil.image2Pdf(files[k].getAbsolutePath(), tmpPdf);}File[] tps = fpd.listFiles();if (tps == null || tps.length == 0) {throw new Image2PdfException("no pdf found,may image 2 pdf failed");} else {List<String> tst = new ArrayList<>();for (int l = 0; l < tps.length; l++) {tst.add(tps[l].getAbsolutePath());}tst.sort(Comparator.comparing(t -> t));YhPdfUtil.mergePdf(tst, targetPdf);try {FileUtils.deleteDirectory(imageDir);FileUtils.deleteDirectory(fpd);} catch (IOException e) {log.error("pdf轉純圖pdf后,刪除臨時文件失敗:", e);}}}}}public static void pdf2ImagePdfWithMax(String source, String targetPdf, long size)throws Pdf2ImageException, PdfSplitException, Image2PdfException, IOException {int dpi;File sourceFile = new File(source);if (sourceFile.length() <= size) {log.info("sourceFile's length:{}>size:{},just copy", sourceFile.length(), size);FileUtils.copyFile(sourceFile, new File(targetPdf));} else {long c = size * 1000 / sourceFile.length();c = c > 1000 ? 1000 : c;for (dpi = Integer.parseInt(String.valueOf(c)); dpi > 1; dpi = dpi / 2) {pdf2ImagePdf(source, targetPdf, dpi);File file = new File(targetPdf);if (file.length() > size) {continue;} else {break;}}}}/*** 合并pdf** @param fileList 本地文件列表 ["D:/opt/aaa.pdf","D:/opt/bbb.pdf"]* @param newPdfPath 合并文件的保存路徑 "D:/opt/ccc.pdf"* @return boolean* @throws* @version V1.0.0* @date 2021/11/4 10:00*/public static boolean mergePdf(List<String> fileList, String newPdfPath) {Document document = null;FileOutputStream fo = null;PdfCopy copy = null;PdfReader rr = null;try {fo = new FileOutputStream(newPdfPath);rr = new PdfReader(fileList.get(0));document = new Document(rr.getPageSize(1));copy = new PdfCopy(document, fo);copy.setFullCompression();document.open();for (int i = 0; i < fileList.size(); i++) {PdfReader reader = new PdfReader(fileList.get(i));try {int n = reader.getNumberOfPages();for (int j = 1; j <= n; j++) {document.newPage();PdfImportedPage page = copy.getImportedPage(reader, j);copy.addPage(page);}} finally {reader.close();}}return true;} catch (IOException | DocumentException e) {log.error("pdf合并失敗:", e);return false;} finally {if (rr != null) {rr.close();}if (copy != null) {copy.close();}if (document != null) {document.close();}if (fo != null) {try {fo.close();} catch (Exception e) {log.error("Io關閉異常:", e);}}}}方案描述
該方案是通過pdfbox按某個dpi將pdf拆分成圖片,然后在將拆出來的pdf通過itext合成為pdf.如果合并的pdf大于體積,則按更小的dpi再來一遍。問題
其實該方案流程上沒有問題,但是在性能上會存在非常大的漏洞及消耗-內存泄漏問題。pdfbox會緩存大量的pdf元數據(字體,字典)等信息 且無法被GC,或者說,在Gc之前,Java服務進程已經被服務器殺死了。剛開始還以為是版本問題,我看最新版本對內存做了優化,但是在升級 最新版本之后,內存增長雖然好了些,但是在有限的內存下。依舊無法會因內存泄漏問題導致服務宕機。方案三:采用apose將pdf轉為圖片(不推薦)
代碼
public static File pdf2Images(String pdfPath, String imageDirPath, int dpi)throws Pdf2ImageException, PdfSplitException {imageDirPath = imageDirPath.replace("\\", "/");if (!imageDirPath.endsWith("/")) {imageDirPath = imageDirPath + "/";}File file = new File(pdfPath);File imageDir = new File(imageDirPath);if (!imageDir.exists()) {imageDir.mkdirs();}com.aspose.pdf.Document pdDocument;try {pdDocument = new com.aspose.pdf.Document(pdfPath);FileOutputStream fileOutputStream = null;int pages = pdDocument.getPages().size();if (pages == 1) {try {Resolution resolution = new Resolution(dpi);JpegDevice jpegDevice = new JpegDevice(resolution);String tmpImage = imageDirPath + file.getName().substring(0, file.getName().lastIndexOf(".")) +"-" + System.currentTimeMillis() + ".png";log.info("pdf just one page,use dpi {} pdf file 2 image:{}>>>{}", dpi, pdfPath, tmpImage);fileOutputStream = new FileOutputStream(new File(tmpImage));jpegDevice.process(pdDocument.getPages().get_Item(1), fileOutputStream);fileOutputStream.flush();} finally {pdDocument.close();if (fileOutputStream != null) {fileOutputStream.close();}}} else {log.info("the pdf so many pages, split every page before convert...");String tmpPdfPath =pdfPath.replace("\\", "/").substring(0, pdfPath.lastIndexOf(".")) + "-pdf-" + System.currentTimeMillis() + "/";File tmpPdfDir = splitPerPagePdf(pdfPath, tmpPdfPath);File[] files = tmpPdfDir.listFiles();if (files == null || files.length == 0) {throw new PdfSplitException("pdf split failed, no result fle found");} else {List<File> pdfs = new ArrayList<File>(Arrays.asList(files));pdfs.sort(Comparator.comparing(file1 -> file.getName()));for (int k = 0; k < pdfs.size(); k++) {pdf2Images(pdfs.get(k).getAbsolutePath(), imageDirPath, dpi);}FileUtils.deleteDirectory(new File(tmpPdfPath));}}return imageDir;} catch (IOException e) {log.error("pdf轉圖片失敗:{}", e);throw new Pdf2ImageException(pdfPath);}}public static void image2Pdf(String inputFile, String pdfFile) throws Image2PdfException {log.info("convert image 2 pdf :{}>>>{}", inputFile, pdfFile);Document doc = null;ByteArrayOutputStream outStream = null;PdfWriter pdfWriter = null;FileInputStream fi = null;try {File file = new File(inputFile);doc = new Document(PageSize.A4, 20, 20, 20, 20);pdfWriter = PdfWriter.getInstance(doc, new FileOutputStream(pdfFile));doc.open();doc.newPage();Image image;if (file.getName().toLowerCase().endsWith("jpg") || file.getName().toLowerCase().endsWith("jpeg")) {java.awt.Image awtImage = Toolkit.getDefaultToolkit().createImage(file.getAbsolutePath());image = Image.getInstance(awtImage, null);} else {image = Image.getInstance(file.getAbsolutePath());}float height = image.getHeight();float width = image.getWidth();if (width > height) {fi = new FileInputStream(file);BufferedImage src = ImageIO.read(fi);BufferedImage des1 = RotateImage.rotate(src, 90);String type = file.getName().substring(file.getName().lastIndexOf(".") + 1).toLowerCase();outStream = new ByteArrayOutputStream();ImageIO.write(des1, type, outStream);image = Image.getInstance(outStream.toByteArray());height = image.getHeight();width = image.getWidth();}int percent = getPercent(height, width);image.setAlignment(Image.MIDDLE);image.scalePercent(percent);float x = (PageSize.A4.getWidth() - image.getScaledWidth()) / 2;float y = (PageSize.A4.getHeight() - image.getScaledHeight()) / 2;image.setAbsolutePosition(x, y);doc.add(image);} catch (Exception e) {log.error("image 2 pdf failed:{}>>>{}", inputFile, pdfFile);log.error("exception info:", e);throw new Image2PdfException(e.getMessage());} finally {if (doc != null) {try {doc.close();} catch (Exception e) {log.info("空文檔:", e);}}if (pdfWriter != null) {pdfWriter.close();}if (outStream != null) {try {outStream.close();} catch (IOException e) {e.printStackTrace();}}if (fi != null) {try {fi.close();} catch (IOException e) {e.printStackTrace();}}}}/*** 合并pdf** @param fileList 本地文件列表 ["D:/opt/aaa.pdf","D:/opt/bbb.pdf"]* @param newPdfPath 合并文件的保存路徑 "D:/opt/ccc.pdf"* @return boolean* @throws* @version V1.0.0* @date 2021/11/4 10:00*/public static boolean mergePdf(List<String> fileList, String newPdfPath) {Document document = null;FileOutputStream fo = null;PdfCopy copy = null;PdfReader rr = null;try {fo = new FileOutputStream(newPdfPath);rr = new PdfReader(fileList.get(0));document = new Document(rr.getPageSize(1));copy = new PdfCopy(document, fo);copy.setFullCompression();document.open();for (int i = 0; i < fileList.size(); i++) {PdfReader reader = new PdfReader(fileList.get(i));try {int n = reader.getNumberOfPages();for (int j = 1; j <= n; j++) {document.newPage();PdfImportedPage page = copy.getImportedPage(reader, j);copy.addPage(page);}} finally {reader.close();}}return true;} catch (IOException | DocumentException e) {log.error("pdf合并失敗:", e);return false;} finally {if (rr != null) {rr.close();}if (copy != null) {copy.close();}if (document != null) {document.close();}if (fo != null) {try {fo.close();} catch (Exception e) {log.error("Io關閉異常:", e);}}}}public static void compress(String source, String target,int qa) {new com.aspose.pdf.Document doc = new new com.aspose.pdf.Document(source);//設置壓縮屬性OptimizationOptions opt = new OptimizationOptions();//刪除PDF不必要的對象opt.setRemoveUnusedObjects(true);//鏈接重復流opt.setLinkDuplcateStreams(false);//刪除未使用的流opt.setRemoveUnusedStreams(false);//刪除不必要的字體opt.setUnembedFonts(true);//壓縮PDF中的圖片opt.setCompressImages(true);//圖片壓縮比, 0 到100可選,越低壓縮比越大opt.setImageQuality(qa);doc.optimizeResources(opt);//優化web的PDF文檔doc.optimize();doc.save(target);}方案描述
流程是 pdf轉圖片->圖片轉pdf->合并->循環壓縮至指定大小,該方案解決了pdfbox內存泄漏問題問題
雖然解決的pdfbox內存泄漏問題,但是內存占用依舊非常嚴重。幾個文件轉換,內存飆升4個G。對服務而言, 還是比較危險的,在內存寬裕的情況下,采用這套方案可以,但是在內存禁止的情況下,不建議如此去做。方案四:ghostscript+ImageMagick(推薦,最終方案)
代碼:
private static String command = "";private static final String cmdExpress = "%s -density 150 -quality %s -limit memory 10mb -limit map 10mb %s %s";private static String gsCommand = "";static {String os = System.getProperty("os.name");if (os != null && os.toLowerCase().contains("window")) {command = "magick";gsCommand = "gswin32c";} else if (os != null && os.toLowerCase().contains("ubuntu")) {command = "sudo convert";gsCommand = "sudo gs";} else {command = "convert";gsCommand = "gs";}}public static void pdf2ImagePdf(String pdfPath, String targetPdf, int qa)throws Pdf2ImageException, Image2PdfException {String imageDirPath = pdfPath.substring(0, pdfPath.lastIndexOf(".")).replace("\\", "/");log.info("pdf2image:{}>>>{}", pdfPath, imageDirPath);pdfPath = pdfPath.replace("\\", "/");File pdf = new File(pdfPath);String pdfName = pdf.getName();File imageDir = new File(imageDirPath);if (!imageDir.exists()) {imageDir.mkdirs();}String imageName = pdfName.substring(0, pdfName.lastIndexOf(".")) + ".png";String imageFilePath = imageDirPath + "/" + imageName;imageFilePath = imageFilePath.replace("\\", "/");String pdf2ImgCmd = String.format(cmdExpress, command, qa, pdfPath, imageFilePath);log.info("pdf2ImgCmd:{}", pdf2ImgCmd);try {Process pro = Runtime.getRuntime().exec(pdf2ImgCmd);pro.waitFor(5, TimeUnit.MINUTES);} catch (Exception e) {log.error("pdf轉圖片你失敗:", e);throw new Pdf2ImageException(e.getMessage());}String inputFile = imageDirPath + "/*.png";String cmdEx = "%s -density 150 -quality %s -limit memory 10mb -limit map 10mb %s %s";String img2PdfCmd = String.format(cmdEx, command, qa, inputFile, targetPdf);log.info("convert2PdfCmd:{}", img2PdfCmd);try {Process pro = Runtime.getRuntime().exec(img2PdfCmd);pro.waitFor(3, TimeUnit.MINUTES);} catch (Exception e) {log.error("pdf轉圖片你失敗:", e);throw new Image2PdfException(e.getMessage());}FileUtil.del(imageDirPath);}/*** @param src 源文件* @param dest 目標文件* @throws IOException* @throws DocumentException*/public static void compressPdf(String src, String dest, int qa) throws IOException {String compressCommand = "%s -dQUIET -dNOSAFER -r%s -sDEVICE=pdfwrite -dCompatibilityLevel=1.3 -dPDFSETTINGS=/screen -dNOPAUSE -dBATCH -dColorImageResolution=150 -sOutputFile=%s %s";src = src.replace("\\", "/");dest = dest.replace("\\", "/");String cmd = String.format(compressCommand, gsCommand, qa, dest, src);log.info(cmd);try {Process process = Runtime.getRuntime().exec(cmd);process.waitFor(3, TimeUnit.MINUTES);} catch (Exception e) {log.info("文檔轉換失敗:", e);throw new PdfCompressException(e.getMessage());}}public static void pdf2ImagePdfWithMax(String source, String targetPdf, long size)throws IOException {File sourceFile = new File(source);if (sourceFile.length() <= size) {log.info("sourceFile's length:{}>size:{},just copy", sourceFile.length(), size);FileUtils.copyFile(sourceFile, new File(targetPdf));} else {String targetTmpPdf = targetPdf.substring(0, targetPdf.lastIndexOf(".")) + "-tmp" + ".pdf";try {FutureTask<Boolean> futureTask = new FutureTask<>(() -> {pdf2ImagePdf(source, targetTmpPdf, 96);compressPdf2FixLength(targetTmpPdf, targetPdf, size);return true;});YhConstant.ITEM_POOL.submit(futureTask);try {futureTask.get(5, TimeUnit.MINUTES);} catch (Exception e) {throw new PdfCompressException("壓縮失敗:" + e.getMessage());}} finally {File file = new File(targetTmpPdf);if (file.exists()) {file.delete();}}}}方案描述:
流程依舊是 pdf轉圖片->圖片合并成pdf->pdf壓縮 只是通過系統層ghostscript+ImageMagick來實現 cenos:yum install -y ghostscript ImageMagickvi /etc/ImageMagick-6/policy.xml將 <policy domain="module"這一行取消注釋,并改為:<policy domain="module" rights="read|write" pattern="{PS,PDF,XPS}" /> unbuntu:apt install -y ghostscript ImageMagick同樣需要修改etc/magick安裝目錄下的policy.xml文件 windows:自行安裝且添加環境變量。問題
ghostscript壓縮pdf稍微費些內存,但是比起java要好好多。建議在ghostscript壓縮加入線程池進行并發控制,降低內存爆掉的風險。總結
Java就是TMD費內存,JVM優化其實也就那樣,5家客戶同事在用的saas系統,我只能xms xmx服務器剩余的4個G,再怎么優化也是醉了。總結
以上是生活随笔為你收集整理的Java-pdf无限压缩方案-优化内存问题的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 微机化远动系统与计算机网络,2012年1
- 下一篇: java美元兑换,(Java实现) 美元