bdocr.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. #-*-coding:utf-8 -*-
  2. import json,re
  3. from aip import AipOcr
  4. APP_ID = '11225532'
  5. API_KEY = 'iGhZP96p570OG62oHPRExG09'
  6. SECRET_KEY = 'XGTCHsTf7sWkVmBNKNoGIcWfGXrGHYHI'
  7. client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
  8. def get_file_content(filePath):
  9. with open(filePath, 'rb') as fp:
  10. return fp.read()
  11. def rec_basicgeneral(imgpath):
  12. """
  13. 通用文字识别
  14. """
  15. words = []
  16. image = get_file_content(imgpath)
  17. #res_image = client.basicGeneral(image)
  18. res_image = client.general(image)
  19. for item in res_image["words_result"]:
  20. print(item)
  21. if item["words"]:
  22. words.append(item)
  23. return words
  24. def rec_general_num(imgpath):
  25. """
  26. 识别出数字(题号)
  27. """
  28. nums = []
  29. nums_dct = {}
  30. choices = set()
  31. words = rec_basicgeneral(imgpath)
  32. for word in words:
  33. location = word["location"]
  34. word = word["words"]
  35. res = re.match(r"^(\d+).*",word)
  36. if res:
  37. nums.append(int(res.group(1)))
  38. nums_dct[int(res.group(1))] = location
  39. res = re.findall(r"(A|B|C|D|E|F|G)",word)
  40. if res:
  41. choices.update(set(res))
  42. print(nums)
  43. if (1 in nums) and (2 in nums) and (nums.index(2)-nums.index(1) > 1):
  44. rank_order = 1
  45. else:
  46. rank_order = 2
  47. nums.sort()
  48. return nums,nums_dct,list(choices),rank_order
  49. if __name__ == "__main__":
  50. imgpath = "/tmp/src4.png"
  51. imgpath = "/tmp/test_0_crop.png"
  52. nums,nums_dct,choices,rank_order = rec_general_num(imgpath)
  53. print(nums,nums_dct,choices,rank_order,222222222222222222)