python 3调用百度OCR API实现剪贴板文字识别

castingAT 2018-09-04

本程序调用百度OCR API对剪贴板的图片文字识别,配合CaptureScreen软件,可快速识别文字。

#!python3
import urllib.request, urllib.parse
import os, io, sys, json, socket
import base64
from PIL import ImageGrab
 
socket.setdefaulttimeout(30)
 
def get_auth():
  apikey = 'your apikey'
  secret_key = 'your secret key'
  host = 'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s' % (apikey, secret_key)
  req = urllib.request.Request(host)
  req.add_header('Content-Type', 'application/json; charset=UTF-8')
  res = urllib.request.urlopen(req)
  content = res.read()
  if (content):
    o = json.loads(content.decode())
    return o['access_token']
  return None
 
def ocr_clipboard():
  im = ImageGrab.grabclipboard()
  if im is None:
    print('No image in clipboard')
    return
  print('image size: %sx%s\n>>>\n' % (im.size[0], im.size[1]))
  mf = io.BytesIO()
  im.save(mf, 'JPEG')
  mf.seek(0)
  buf = mf.read()
  b64 = base64.encodebytes(buf)
  access_token = get_auth()
  if access_token is not None:
    url = 'https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token=%s' % access_token
    data = urllib.parse.urlencode({'image' : b64}).encode()
    req = urllib.request.Request(url, method='POST')
    req.add_header('Content-Type', 'application/x-www-form-urlencoded')
    with urllib.request.urlopen(req, data) as p:
      res = p.read().decode('utf-8')
      o = json.loads(res)
      if o['words_result'] is not None:
        for w in o['words_result']:
          print(w['words'])
      print('\n<<<')
  else:
    print('access_token is none')
 
if __name__ == '__main__':
 
  x = input('ocr form clipboard image: z to ocr, q to quit-->')
  while(x != 'q'):
    if x=='z':
      ocr_clipboard()
    x = input('ocr from clipboard image: r to ocr, q to quit-->')
  print('bye')

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持安科网。

相关推荐