日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

百度云文字识别OCR【表格】

發布時間:2023/12/20 编程问答 41 豆豆
生活随笔 收集整理的這篇文章主要介紹了 百度云文字识别OCR【表格】 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

1、我轉換的是表格,謝天鞋底有個接口叫:

表格文字識別接口

自動識別表格線及表格內容,結構化輸出表頭、表尾及每個單元格的文字內容。

表格文字識別接口為異步接口,分為兩個API:提交請求接口、獲取結果接口。

需要注意兩點:

1、只有每天50的免費使用量

2、特別注意:表格【邊框】必須有【實線】,excel的那種線也是不可識別

3、背景干凈最好,這樣識別率比較高

4、數字的、和,是分不清的
5、長圖片不可識別

我實現的代碼,有大神覺得不好的地方,萬萬指出:

@Controller public class OCRController extends BaseController {private static final Log logger = LogFactory.getLog(OCRController.class);public JSONObject sample(AipOcr client, String image) {// 傳入可選參數調用接口HashMap<String, String> options = new HashMap<String, String>();// options.put("recognize_granularity", "big");options.put("detect_direction", "true");// options.put("vertexes_location", "true");// 參數為本地路徑JSONObject res = client.basicAccurateGeneral(image, options);return res;}public JSONObject sample1(AipOcr client, String image) {// 傳入可選參數調用接口HashMap<String, String> options = new HashMap<String, String>();// 參數為本地路徑JSONObject res = client.form(image, options);return res;}public static JSONObject sample2(AipOcr client, String image) {// 傳入可選參數調用接口HashMap<String, String> options = new HashMap<String, String>();// 參數為本地路徑JSONObject res = client.tableRecognitionAsync(image, options);return res;/** // 參數為二進制數組 byte[] file = readFile("test.jpg"); res =* client.tableRecognitionAsync(file, options);* System.out.println(res.toString(2));*/}public static JSONObject sample3(AipOcr client, String requestId) {// 傳入可選參數調用接口HashMap<String, String> options = new HashMap<String, String>();options.put("result_type", "json");// 表格識別結果JSONObject res = client.tableResultGet(requestId, options);return res;}public static void main(String[] args) throws JSONException {// 初始化一個AipOcrAipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);// 可選:設置網絡連接參數client.setConnectionTimeoutInMillis(2000);client.setSocketTimeoutInMillis(60000);// 調用接口String path = "d:\\a2.png";Map<String, Object> json = new HashMap<String, Object>();String reqestId = getReqestId(json, path);System.out.println(reqestId);System.out.println("===================1==============");net.sf.json.JSONArray ja = new net.sf.json.JSONArray();net.sf.json.JSONArray headerArr = new net.sf.json.JSONArray();imageTransformation(ja, headerArr, reqestId, "20190101-天馬.png");System.out.println("==================2===============");}private void drawRect(String path) {File _file = new File(path); // 讀入文件try {// 構造Image對象Image src = javax.imageio.ImageIO.read(_file);int width = src.getWidth(null); // 得到源圖寬int height = src.getHeight(null); // 得到源圖長BufferedImage image = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB);Graphics graphics = image.getGraphics();graphics.drawImage(src, 0, 0, width, height, null); // 繪制圖// 背景那么干凈,取灰度,找個閾值,二值化一下,然后腐蝕,系數調大點,尋找黑色區域的輪廓的外接正矩形// 畫邊框graphics.setColor(Color.BLACK);graphics.drawRect(1, 0, width - 1, height - 1);// graphics.drawRect(1, 1, width - 1, height - 1);// graphics.drawRect(0, 0, width-2, height- 2);FileOutputStream out = new FileOutputStream(path); // 輸出到文件流JPEGImageEncoder encoder = JPEGCodec.createJPEGEncoder(out);encoder.encode(image);out.close();} catch (IOException e) {e.printStackTrace();}}private void delFile(String pic, String name) {logger.info("============================刪除原文件3個:" + pic + "=========================");// 刪除原文件3個String prepareForUpload ="";String filename ="";String[] arr = filename.split(".");if (arr != null && arr.length > 1) {if ("0".equals(name)) {filename = arr[0].substring(0, arr[0].length() - 1) + "2" + arr[1];logger.info("============================路徑名+文件2號圖=刪除文件:" + filename + "=========================");UploadHelper.delTempFile(filename, prepareForUpload);filename = arr[0].substring(0, arr[0].length() - 1) + "1" + arr[1];logger.info("============================路徑名+文件1號圖=刪除文件:" + filename + "=========================");UploadHelper.delTempFile(filename, prepareForUpload);}filename = arr[0].substring(0, arr[0].length() - 1) + arr[1];logger.info("============================路徑名+文件原圖=刪除文件:" + filename + "=========================");UploadHelper.delTempFile(filename, prepareForUpload);}}private static Map<String, Object> imageTransformation(net.sf.json.JSONArray ja, net.sf.json.JSONArray headerArr, String requestId, String fileName)throws JSONException {Map<String, Object> map = new HashMap<String, Object>();List<String> worng = new ArrayList<String>();String str = toRequestOCR(requestId);if (StringUtils.isBlank(str)) {map.put("code", "數據為空");return map;}List<OcrDTO> list = getList(str, fileName);getHeader(headerArr, list);return map;}/*** 獲取主數據* * @param ja* @param list*/private static List<String> getBody(net.sf.json.JSONArray ja, List<OcrDTO> list) {logger.info("=======================獲取主數據===========================");List<OcrDTO> t = new ArrayList<OcrDTO>();for (int i = 0; i < list.size(); i++) {t.add(list.get(i));}for (int i = 0; i < list.size() - 1; i++) {for (int j = list.size() - 1; j > i; j--) {if (list.get(j).getColumn() == (list.get(i).getColumn())) {list.remove(j);}}}net.sf.json.JSONObject column = new net.sf.json.JSONObject();for (int i = 0; i < list.size(); i++) {net.sf.json.JSONArray rowJsonArray = new net.sf.json.JSONArray();Map<String, Integer> m = new HashMap<String, Integer>();for (int j = 0; j < t.size(); j++) {if (list.get(i).getColumn() == t.get(j).getColumn() && list.get(i).getRow() != t.get(j).getRow()) {column.put("row", t.get(j).getRow());column.put("name", t.get(j).getWord());column.put("flag", 0);if (!m.containsKey("row" + t.get(0).getRow())) {m.put("row" + t.get(j).getRow(), t.get(j).getRow());rowJsonArray.add(column);}}}ja.add(rowJsonArray);}List<String> wrong = new ArrayList<String>();for (int i = 0; i < ja.size(); i++) {net.sf.json.JSONArray arr = (net.sf.json.JSONArray) ja.get(i);wrong = isWrong(arr);logger.info("=========================for:wrong" + wrong + "============================");if (CollectionUtils.isNotEmpty(wrong)) {for (int j = 0; j < wrong.size(); j++) {net.sf.json.JSONObject obj = (net.sf.json.JSONObject) arr.get(Integer.parseInt(wrong.get(j)));obj.put("flag", 1);}break;}}logger.info("=========================wrong" + wrong + "============================");return wrong;}/*** 獲取頭部信息* * @param headerArr* @param list*/private static void getHeader(net.sf.json.JSONArray headerArr, List<OcrDTO> list) {net.sf.json.JSONObject head = new net.sf.json.JSONObject();net.sf.json.JSONArray harr = new net.sf.json.JSONArray();for (int i = 0; i < list.size(); i++) {if (list.get(i).getRow() == 0) {harr.add(list.get(i).getWord());}}/** if (null != harr) { harr.add("日期"); harr.add("類目"); }*/head.put("head", harr);headerArr.add(head);}/*** 獲取所有的數據,并將數據存到list* * @param str* @return*/private static List<OcrDTO> getList(String str, String fileName) {List<OcrDTO> list = new ArrayList<OcrDTO>();net.sf.json.JSONObject fromObject = net.sf.json.JSONObject.fromObject(str);net.sf.json.JSONArray jsonArray = fromObject.getJSONArray("forms");net.sf.json.JSONArray jsonbody = null;for (int i = 0; i < jsonArray.size(); i++) {net.sf.json.JSONObject json1 = (net.sf.json.JSONObject) jsonArray.get(i);jsonbody = json1.getJSONArray("body");}for (int i = 0; i < jsonbody.size(); i++) {net.sf.json.JSONObject json1 = (net.sf.json.JSONObject) jsonbody.get(i);String column = json1.getString("column");String row = json1.getString("row");String word = json1.getString("word");OcrDTO ocr = new OcrDTO(Integer.parseInt(row.substring(1, row.length() - 1)), Integer.parseInt(column.substring(1, column.length() - 1)), word);list.add(ocr);}int num = 0;int colum = 0;for (int i = 0; i < jsonbody.size(); i++) {net.sf.json.JSONObject json1 = (net.sf.json.JSONObject) jsonbody.get(i);String column = json1.getString("column");int col = Integer.parseInt(column.substring(1, column.length() - 1));if (col == 0) {num += 1;}colum = col;if (col > colum) {colum = col;}}String[] arr = fileName.split("-");for (int i = 0; i < num; i++) {if (i == 0) {OcrDTO ocr = new OcrDTO(i, colum + 2, "日期");list.add(ocr);continue;}String word = arr[0];OcrDTO ocr = new OcrDTO(i, colum + 2, word);list.add(ocr);}for (int i = 0; i < num; i++) {if (i == 0) {OcrDTO ocr = new OcrDTO(i, colum + 3, "類目");list.add(ocr);continue;}String word = arr[1].substring(0, arr[1].length() - 4);OcrDTO ocr = new OcrDTO(i, colum + 3, word);list.add(ocr);}Collections.sort(list, new Comparator<OcrDTO>() {@Overridepublic int compare(OcrDTO o1, OcrDTO o2) {if (o1.getColumn() == o2.getColumn()) {return o1.getRow() - (o2.getRow());}return o1.getColumn() - o2.getColumn();}});//logger.info("=====================list:" + list + "========================");return list;}/*** 開始請求ocr接口,獲取返回* * @param requestId* @return* @throws JSONException*/private static String toRequestOCR(String requestId) throws JSONException {String[] arr = requestId.split(" ");AipOcr client = new AipOcr(APP_ID, API_KEY, SECRET_KEY);if (arr[0].equals("1")) {client = new AipOcr(APP_ID1, API_KEY1, SECRET_KEY1);}if (arr[0].equals("2")) {client = new AipOcr(APP_ID2, API_KEY2, SECRET_KEY2);}if (arr[0].equals("3")) {client = new AipOcr(APP_ID3, API_KEY3, SECRET_KEY3);}if (arr[0].equals("4")) {client = new AipOcr(APP_ID4, API_KEY4, SECRET_KEY4);}requestId = arr[1];JSONObject res = sample3(client, requestId);String err = res.toString(2);if (!res.has("result")) {res = isb(client, requestId);}if (res.toString().contains("error_msg")) {logger.info("=====================err" + err + "========================");return null;}if (!res.has("result")) {return null;}String str = cycleRequest(requestId, client, err, res);return str;}/*** 假如報錯,循環請求* * @param client* @param requestId* @return*/private static JSONObject isb(AipOcr client, String requestId) {try {Thread.sleep(5000);} catch (InterruptedException e) {e.printStackTrace();}JSONObject res1 = sample3(client, requestId);if (res1.toString().contains("error_msg")) {boolean f = true;int i = 0;while (f) {try {Thread.sleep(5000);} catch (InterruptedException e) {e.printStackTrace();}i += 1;res1 = sample3(client, requestId);logger.info("假如報錯,循環請求" + res1.toString());if (!res1.toString().contains("error_msg")) {f = false;}if (i > 5) {f = false;}}}return res1;}/*** 假如未完成,繼續請求* * @param requestId* @param client* @param err* @param json* @return* @throws JSONException*//*** @param requestId* @param client* @param err* @param res* @return* @throws JSONException*/private static String cycleRequest(String requestId, AipOcr client, String err, JSONObject res) throws JSONException {JSONObject json = res.getJSONObject("result");logger.info("=====================假如未完成,繼續請求err" + err + "========================");int num = 0;while (true) {num += 1;String str = json.getString("ret_msg");if ("已完成".equals(str)) {break;}try {Thread.sleep(10000);} catch (InterruptedException e) {e.printStackTrace();}res = sample3(client, requestId);// 假如錯誤的if (res.toString().contains("error_msg")) {err = res.toString();break;}if (res.has("result"))json = res.getJSONObject("result");// 假如正確的if (json.has("ret_msg")) {str = json.getString("ret_msg");if ("未開始".equals(str) || "進行中".equals(str)) {logger.info("===============進行中未開始err=" + str + "=========================");try {Thread.sleep(10000);} catch (InterruptedException e) {e.printStackTrace();}} else if ("已完成".equals(str)) {err = res.toString();// logger.info("===============已完成err=" + err +// "=========================");break;}}if (num > 8) {err = res.toString();break;}logger.info("=====================while最后一行假如未完成,繼續請求ret_msg" + str + "========================");}if (err.contains("error_msg")) {logger.info("=====================err" + err + "========================");return null;}String str = null;if (json.has("result_data")) {str = json.getString("result_data");// logger.info("=====================json.has(result_data)" +// json.toString() + "========================");return str;}// logger.info("=====================假如未完成,繼續請求strisEmpty" + str +// "========================");return str;} }


?

?

?

總結

以上是生活随笔為你收集整理的百度云文字识别OCR【表格】的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。