Office document identification
Interface description
Online debugging
Request Description
|
|
---|---|
|
|
|
|
---|---|
|
|
|
|
|
|
|
---|---|---|---|---|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Office document identification
curl -i -k ' https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token= [Call the token obtained from the authentication interface] ' --data 'image=[Picture Base64 encoding, UrlEncode required]' -H 'Content-Type:application/x-www-form-urlencoded'
# encoding:utf-8
import requests import base64 ''' Office document identification ''' request_url = " https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office "
#Open picture file in binary mode f = open ( '[Local file]' , 'rb' ) img = base64 . b64encode ( f . read ( ) ) params = { "image" : img } access_token = '[Token obtained by calling the authentication interface]' request_url = request_url + "?access_token=" + access_token headers = { 'content-type' : 'application/x-www-form-urlencoded' } response = requests . post ( request_url , data = params , headers = headers )
if response :
print ( response . json ( ) )
package com . baidu . ai . aip ;
import com . baidu . ai . aip . utils . Base64Util ;
import com . baidu . ai . aip . utils . FileUtil ;
import com . baidu . ai . aip . utils . HttpUtil ;
import java . net . URLEncoder ;
/** *Office document identification */
public class AnalysisOffice {
/** *Tool class required in important tip code *FileUtil, Base64Util, HttpUtil, GsonUtils * https://ai.baidu.com/file/658A35ABAB2D404FBF903F64D47C1F72 * https://ai.baidu.com/file/C8D81F3301E24D2892968F09AE1AD6E2 * https://ai.baidu.com/file/544D677F5D4E4F17B4122FBD60DB82B3 * https://ai.baidu.com/file/470B3ACCA3FE43788B5A963BF0B625F3 *Download */
public static String analysisOffice ( ) {
//Request url
String url = " https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office " ;
try {
//Local file path
String filePath = [Local file path] ;
byte [ ] imgData = FileUtil . readFileByBytes ( filePath ) ;
String imgStr = Base64Util . encode ( imgData ) ;
String imgParam = URLEncoder . encode ( imgStr , "UTF-8" ) ;
String param = "image=" + imgParam ;
//Note that the purpose here is to simplify the encoding and obtain access_token for each request. The online environment access_token has an expiration time, and the client can cache it and retrieve it after expiration.
String accessToken = "[Token obtained by calling the authentication interface]" ;
String result = HttpUtil . post ( url , accessToken , param ) ;
System . out . println ( result ) ;
return result ;
} catch ( Exception e ) { e . printStackTrace ( ) ;
}
return null ;
}
public static void main ( String [ ] args ) {
AnalysisOffice . analysisOffice ( ) ;
}
}
# include <iostream>
# include <curl/curl.h>
//Download link of libcurl library: https://curl.haxx.se/download.html
//Download link of jsoncpp library: https://github.com/open-source-parsers/jsoncpp/
const static std :: string request_url = " https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office " ;
static std :: string analysisOffice_result ;
/** *The curl sends the callback function called by the http request. The returned body in json format is parsed in the callback function, and the parsing result is stored in the global static variable *See the libcurl document for @ param parameter definitions *@ return See the libcurl document for the definition of the return value */
static size_t callback ( void * ptr , size_t size , size_t nmemb , void * stream ) {
//The obtained body is stored in ptr and converted to string format first analysisOffice_result = std :: string ( ( char * ) ptr , size * nmemb ) ;
return size * nmemb ;
}
/** *Office document identification *@ return If the call is successful, 0 will be returned. If an error occurs, other error codes will be returned */
int analysisOffice ( std :: string & json_result , const std :: string & access_token ) { std :: string url = request_url + "?access_token=" + access_token ; CURL * curl = NULL ; CURLcode result_code ;
int is_success ; curl = curl_easy_init ( ) ;
if ( curl ) {
curl_easy_setopt ( curl , CURLOPT_URL , url . data ( ) ) ;
curl_easy_setopt ( curl , CURLOPT_POST , one ) ; curl_httppost * post = NULL ; curl_httppost * last = NULL ;
curl_formadd ( & post , & last , CURLFORM_COPYNAME , "image" , CURLFORM_COPYCONTENTS , "【base64_img】" , CURLFORM_END ) ;
curl_easy_setopt ( curl , CURLOPT_HTTPPOST , post ) ;
curl_easy_setopt ( curl , CURLOPT_WRITEFUNCTION , callback ) ; result_code = curl_easy_perform ( curl ) ;
if ( result_code != CURLE_OK ) {
fprintf ( stderr , "curl_easy_perform() failed: %s\n" ,
curl_easy_strerror ( result_code ) ) ; is_success = one ;
return is_success ;
} json_result = analysisOffice_result ;
curl_easy_cleanup ( curl ) ; is_success = zero ;
} else {
fprintf ( stderr , "curl_easy_init() failed." ) ; is_success = one ;
}
return is_success ;
}
<? php
/** *Initiate http post requests (REST APIs) and obtain the results of REST requests * @param string $url * @param string $param * @return - http response body if succeeds, else false. */
function request_post ( $url = '' , $param = '' )
{
if ( empty ( $url ) || empty ( $param ) ) {
return false ;
}
$postUrl = $url ;
$curlPost = $param ;
//Initialize curl
$curl = curl_init ( ) ;
curl_setopt ( $curl , CURLOPT_URL , $postUrl ) ;
curl_setopt ( $curl , CURLOPT_HEADER , zero ) ;
//The result is required to be a string and output to the screen
curl_setopt ( $curl , CURLOPT_RETURNTRANSFER , one ) ;
curl_setopt ( $curl , CURLOPT_SSL_VERIFYPEER , false ) ;
//Post submission method
curl_setopt ( $curl , CURLOPT_POST , one ) ;
curl_setopt ( $curl , CURLOPT_POSTFIELDS , $curlPost ) ;
//Run curl
$data = curl_exec ( $curl ) ;
curl_close ( $curl ) ;
return $data ;
}
$token = '[Token obtained by calling the authentication interface]' ;
$url = ' https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token= ' . $token ;
$img = file_get_contents ( '[Local file path]' ) ;
$img = base64_encode ( $img ) ;
$bodys = array (
'image' = > $img
) ;
$res = request_post ( $url , $bodys ) ;
var_dump ( $res ) ;
using System ;
using System . IO ;
using System . Net ;
using System . Text ;
using System . Web ;
namespace com . baidu . ai {
public class AnalysisOffice
{
//Office document identification
public static string analysisOffice ( )
{
string token = "[Token obtained by calling the authentication interface]" ;
string host = " https://aip.baidubce.com/rest/2.0/ocr/v1/doc_analysis_office?access_token= " + token ;
Encoding encoding = Encoding . Default ;
HttpWebRequest request = ( HttpWebRequest ) WebRequest . Create ( host ) ; request . Method = "post" ; request . KeepAlive = true ;
//Base64 encoding of pictures
string base64 = getFileBase64 ( [Local picture file] ) ;
String str = "image=" + HttpUtility . UrlEncode ( base64 ) ;
byte [ ] buffer = encoding . GetBytes ( str ) ; request . ContentLength = buffer . Length ; request . GetRequestStream ( ) . Write ( buffer , zero , buffer . Length ) ;
HttpWebResponse response = ( HttpWebResponse ) request . GetResponse ( ) ;
StreamReader reader = new StreamReader ( response . GetResponseStream ( ) , Encoding . Default ) ;
string result = reader . ReadToEnd ( ) ; Console . WriteLine ( "Office document identification:" ) ; Console . WriteLine ( result ) ;
return result ;
}
public static String getFileBase64 ( String fileName ) {
FileStream filestream = new FileStream ( fileName , FileMode . Open ) ;
byte [ ] arr = new byte [ filestream . Length ] ; filestream . Read ( arr , zero , ( int ) filestream . Length ) ;
string baser64 = Convert . ToBase64String ( arr ) ; filestream . Close ( ) ;
return baser64 ;
}
}
}
Return description
|
|
|
|
---|---|---|---|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{
"results_num" : five ,
"log_id" : "1410491260247950412" ,
"results" : [
{
"words_type" : "print" ,
"words" : {
"words_location" : {
"top" : eighty-eight ,
"left" : four hundred and forty-two ,
"width" : one hundred and forty-two ,
"height" : forty-nine
} ,
"word" : "Trip sheet"
}
} ,
{
"words_type" : "print" ,
"words" : {
"words_location" : {
"top" : two hundred and forty-one ,
"left" : four hundred and thirty-nine ,
"width" : three hundred and ninety-three ,
"height" : thirty-seven
} ,
"word" : "8 days and 7 nights for famous schools on the east coast of the United States"
}
} ,
{
"words_type" : "print" ,
"words" : {
"words_location" : {
"top" : three hundred and eighteen ,
"left" : four hundred and thirty-six ,
"width" : seven hundred and seventy-four ,
"height" : thirty-one
} ,
"word" : "The Capitol is located on the Capitol Hill, 25 meters high in Washington. It is the heart of the United States."
}
} ,
{
"words_type" : "print" ,
"words" : {
"words_location" : {
"top" : three hundred and seventy-four ,
"left" : four hundred and thirty-four ,
"width" : eight hundred and five ,
"height" : thirty-one
} ,
"word" : "On the big dome of the central attic stands a bronze statue of the Statue of Liberty 6 meters high."
}
} ,
{
"words_type" : "print" ,
"words" : {
"words_location" : {
"top" : four hundred and thirty-one ,
"left" : four hundred and thirty-six ,
"width" : five hundred and fifty-six ,
"height" : thirty-one
} ,
"word" : "The eastern lawn is where all previous presidents held their inaugurations."
}
}
]
}