tesseract无法识别白名单后的单个文本段

#configuring parameters for tesseract # whitlist = "-c tessedit_char_whitelist=1234567890CBDE" custom_config = r'--oem 3 --psm 6 ' # now feeding image to tesseract details = pytesseract.image_to_data(threshold_img, output_type=Output.DICT, config=custom_config, lang='eng') print(details.keys()) total_boxes = len(details['text']) for sequence_number in range(total_boxes): # confidence above 30 % CONFIDENCE = 0 if int(details['conf'][sequence_number]) >= CONFIDENCE: (x, y, w, h) = (details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number], details['height'][sequence_number]) threshold_img = cv2.rectangle(threshold_img, (x, y), (x + w, y + h), (0, 255, 0), 2) # display image cv2.imshow('captured text', threshold_img) cv2.imwrite("before.png", threshold_img) # Maintain output window until user presses a key cv2.waitKey(0) # Destroying present windows on screen cv2.destroyAllWindows()

1条回答

网友

1楼 · 发布于 2024-09-27 07:26:14

一种解决办法是：

1. 分别取每个元组并向上采样2
1. 应用threshold
1. 通过将页面分段模式设置为6进行识别

^{tb1}$

其思想是分别获取每个元组，对其进行上采样，然后应用逆二进制阈值。由于字体的原因，Tesseract误解了几个元组。例如，如果您查看字符D，它看起来像O。如果你想要100%的准确率，那么我建议你train the tesseract。另外，请确保尝试使用其他page-segmentation-modes

以下是阵列输出：

[['1C', '55', '55', 'E9', 'BO'], ['1C', '1C', '55', 'BO', '1C'], ['1C', '55', 'BO', '55', 'IC'], ['1C', 'BD', '50', '1C', '1C'], ['1C', '1C', '55', 'BD', 'BD']]

代码：

import cv2
import pytesseract

img = cv2.imread("IVemF.png")
gry = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
(h, w) = gry.shape[:2]
s_idx1 = 0  # start index1
e_idx1 = int(h/5)  # end index1
cfg = " psm 6"
res = []

for _ in range(0, 5):
    s_idx2 = 0  # start index2
    e_idx2 = int(w / 5)  # end index2
    row = []
    for _ in range(0, 5):
        crp = gry[s_idx1:e_idx1, s_idx2:e_idx2]
        (h_crp, w_crp) = crp.shape[:2]
        crp = cv2.resize(crp, (w_crp*2, h_crp*2))
        thr = cv2.threshold(crp, 0, 255,
                            cv2.THRESH_BINARY_INV |
                            cv2.THRESH_OTSU)[1]
        txt = pytesseract.image_to_string(thr,
                                          config=cfg)
        txt = txt.replace("\n\x0c", "")
        row.append(txt.upper())
        print(txt.upper())
        s_idx2 = e_idx2
        e_idx2 = s_idx2 + int(w/5)
        cv2.imshow("thr", thr)
        cv2.waitKey(0)
    res.append(row)
    s_idx1 = e_idx1
    e_idx1 = s_idx1 + int(h/5)

print(res)

相关问题更多 >

编程相关推荐

热门问题

热门文章