通用表格识别
接口描述
用户通过发送HTTP Post请求方式提交数据至公有云服务器,服务器识别并返回json格式的识别结果。
接口说明
请求URL:http://api.exocr.com/ocr/v1/table_api
请求方式:HTTP Post
返回格式:json
Header
参数 | 值 |
---|---|
Content-Type | 使用二进制方式时,为multipart/form-data;使用url方式时,为application/x-www-form-urlencoded |
请求参数
参数 | 必选 | 类型 | 说明 |
---|---|---|---|
image_base64 | 否 | string | 图像base64串。image_base64、image_url、image_binary三个参数中至少选择一个,读取优先级从前到后依次降低。如:同时选择了以上三个参数,则以image_base64图像为准。 |
image_url | 否 | string | 图像url地址。image_base64、image_url、image_binary三个参数中至少选择一个,读取优先级从前到后依次降低。如:同时选择了以上三个参数,则以image_base64图像为准。 |
image_binary | 否 | data | 图像二进制。image_base64、image_url、image_binary三个参数中至少选择一个,读取优先级从前到后依次降低。如:同时选择了以上三个参数,则以image_base64图像为准。 |
auto_orientation | 否 | string | 是否转正,'1'代表转正,'0'代表不转正。 默认值为'1' |
return_excel | 否 | string | 是否返回excel的Base64编码的图片,'0'代表不返回,'1'代表返回。默认值为'0' |
app_key | 是 | string | 请在控制台->我的应用中获取 |
app_secret | 是 | string | 请在控制台->我的应用中获取 |
调用示例
java(相关依赖库下载)
package com.exocr.httpclient;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import com.exocr.common.utils.HttpClientUtils;
public class Test {
public static void main(String[] args) throws FileNotFoundException {
/**
* base64
*/
//识别url
String url = " http://api.exocr.com/ocr/v1/seal";
//设置请求参数
Map<String, String> params = new HashMap<String, String>();
params.put("app_key", "#####");
params.put("app_secret", "#####");
//设置识别图像base64编码
params.put("image_base64", "base64ImageStr");
//发送请求,得到识别结果
String result = HttpClientUtils.doPost(url, params);
System.out.println(result);
/**
* url
*/
//设置请求参数、识别图像url
params.put("app_key", "#####");
params.put("app_secret", "#####");
params.put("image_url", "imageUrl");
//发送请求,得到识别结果
result = HttpClientUtils.doPost(url, params);
System.out.println(result);
/**
* 二进制
*/
//设置请求参数
params.put("app_key", "#####");
params.put("app_secret", "#####");
//拿到本地图像,写入数据流
File file = new File("/Users/mac/logs/table.png");
InputStream in = new FileInputStream(file);
//设置数据流
Map<String, InputStream> streamMap = new HashMap<String, InputStream>();
streamMap.put("image_binary", in);
//发送请求,得到识别结果
result = HttpClientUtils.doPostStream(url, params, streamMap);
//关闭io流
try {
in.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println(result);
}
}
返回示例
{
"error_code":0,
"description":"识别成功",
"tables":[
{
"cells":[
{
"rows_span":1,
"col_index":0,
"row_index":0,
"bbox":[
103,
266,
719,
318
],
"words":"项 目",
"coordinates":[
[
103,
268
],
[
719,
266
],
[
719,
316
],
[
104,
318
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":1,
"row_index":0,
"bbox":[
719,
265,
857,
316
],
"words":"注释六",
"coordinates":[
[
719,
266
],
[
856,
265
],
[
857,
315
],
[
719,
316
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":3,
"row_index":28,
"bbox":[
1166,
1702,
1469,
1755
],
"words":"9,350.02",
"coordinates":[
[
1166,
1704
],
[
1469,
1702
],
[
1469,
1755
],
[
1166,
1755
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":0,
"row_index":29,
"bbox":[
108,
1757,
724,
1811
],
"words":"开发支出",
"coordinates":[
[
108,
1759
],
[
724,
1757
],
[
724,
1808
],
[
108,
1811
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":2,
"row_index":32,
"bbox":[
864,
1910,
1167,
1963
],
"words":"",
"coordinates":[
[
864,
1911
],
[
1167,
1910
],
[
1167,
1962
],
[
864,
1963
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":3,
"row_index":32,
"bbox":[
1167,
1909,
1470,
1962
],
"words":"425.73",
"coordinates":[
[
1167,
1910
],
[
1470,
1909
],
[
1470,
1960
],
[
1167,
1962
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":0,
"row_index":33,
"bbox":[
108,
1963,
725,
2017
],
"words":"其他非流动资产",
"coordinates":[
[
108,
1966
],
[
725,
1963
],
[
725,
2015
],
[
108,
2017
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":1,
"row_index":33,
"bbox":[
725,
1963,
864,
2015
],
"words":"",
"coordinates":[
[
725,
1963
],
[
864,
1963
],
[
864,
2014
],
[
725,
2015
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":2,
"row_index":33,
"bbox":[
864,
1962,
1167,
2014
],
"words":"",
"coordinates":[
[
864,
1963
],
[
1167,
1962
],
[
1167,
2013
],
[
864,
2014
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":3,
"row_index":33,
"bbox":[
1167,
1960,
1470,
2013
],
"words":"",
"coordinates":[
[
1167,
1962
],
[
1470,
1960
],
[
1470,
2013
],
[
1167,
2013
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":0,
"row_index":34,
"bbox":[
108,
2015,
725,
2069
],
"words":"非流动资产合计",
"coordinates":[
[
108,
2017
],
[
725,
2015
],
[
725,
2067
],
[
108,
2069
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":1,
"row_index":34,
"bbox":[
725,
2014,
864,
2067
],
"words":"",
"coordinates":[
[
725,
2015
],
[
864,
2014
],
[
864,
2066
],
[
725,
2067
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":2,
"row_index":34,
"bbox":[
864,
2013,
1167,
2066
],
"words":"8,320,391.67",
"coordinates":[
[
864,
2014
],
[
1167,
2013
],
[
1167,
2065
],
[
864,
2066
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":3,
"row_index":34,
"bbox":[
1167,
2013,
1470,
2065
],
"words":"7,783,784.01",
"coordinates":[
[
1167,
2013
],
[
1470,
2013
],
[
1470,
2065
],
[
1167,
2065
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":0,
"row_index":35,
"bbox":[
108,
2067,
726,
2121
],
"words":"资产总计",
"coordinates":[
[
108,
2069
],
[
725,
2067
],
[
726,
2118
],
[
109,
2121
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":1,
"row_index":35,
"bbox":[
725,
2066,
865,
2118
],
"words":"",
"coordinates":[
[
725,
2067
],
[
864,
2066
],
[
865,
2118
],
[
726,
2118
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":2,
"row_index":35,
"bbox":[
864,
2065,
1168,
2118
],
"words":"17,605,842.68",
"coordinates":[
[
864,
2066
],
[
1167,
2065
],
[
1168,
2117
],
[
865,
2118
]
],
"cols_span":1
},
{
"rows_span":1,
"col_index":3,
"row_index":35,
"bbox":[
1167,
2065,
1471,
2117
],
"words":"30,192,943.11",
"coordinates":[
[
1167,
2065
],
[
1470,
2065
],
[
1471,
2115
],
[
1168,
2117
]
],
"cols_span":1
}
]
}
],
"text":[
{
"quad":"674,101,894,101,894,143,674,143",
"score":0.99974078,
"words":"资产负债表",
"position":{
"width":220,
"top":101,
"height":42,
"left":674
}
},
{
"quad":"676,172,887,170,887,200,677,202",
"score":0.99991322,
"words":"2016年12月31日",
"position":{
"width":211,
"top":170,
"height":32,
"left":676
}
},
{
"quad":"114,226,222,226,222,253,114,253",
"score":0.99822152,
"words":"编制单位:",
"position":{
"width":108,
"top":226,
"height":27,
"left":114
}
},
{
"quad":"235,224,624,224,624,250,235,250",
"score":0.9989894,
"words":"深圳花儿绽放网络科技股份有限公司",
"position":{
"width":389,
"top":224,
"height":26,
"left":235
}
},
{
"quad":"1230,224,1337,224,1337,248,1230,248",
"score":0.87328136,
"words":"金额单位:",
"position":{
"width":107,
"top":224,
"height":24,
"left":1230
}
},
{
"quad":"1351,224,1451,224,1451,248,1351,248",
"score":0.99988824,
"words":"人民币元",
"position":{
"width":100,
"top":224,
"height":24,
"left":1351
}
}
],
"request_id":"4C701D3885604C85B763BA73CBC49333",
"recognize_time":1335,
"available_count":8483,
"version":null
}
返回说明
通用参数
参数 | 类型 | 说明 |
---|---|---|
error_code | int | 错误码,返回0为正确,其他为错误 |
description | string | 识别结果描述 |
request_id | string | 请求唯一标识符 |
recognize_time | int | 识别所用时间,单位为毫秒 |
available_count | int | 识别剩余可用次数,次数不足时无法继续识别,可在控制台进行充值次数 |
tables | array | 表格内容信息数组,包括所有识别信息 |
text | array | 表格外部信息数组 |
version | string | 版本 |
table特有参数
参数 | 类型 | 说明 |
---|---|---|
rows_span | int | 行占用单元格数量 |
col_index | int | 列号 |
row_index | int | 行号 |
bbox | string | 每个字段的坐标 |
words | string | 单元格内容 |
coordinates | array | 依次为左上,右上,右下,左下顺时针四个点的x,y值 |
cols_span | int | 列占用单元格数量 |
cells | List | 单元格 |
score | Float | 照片可信度 |
text_items | List | 单元格文本 |
Text特有参数
参数 | 类型 | 说明 |
---|---|---|
quad | int | 每个字段的坐标 |
score | double | 得分 |
words | string | 该字段的中文名称 |
position | array | 印章的位置,依次为左上,右上,右下,左下顺时针四个点的x,y值 |
type | string | 类型 |