我试图绕过一个网站的验证码图像,需要几乎85%的准确性,我已经在过去一周,这似乎是不可能的现在。目前我的准确率只有40%。请帮忙。我在下面分享我的代码和一些图像示例:
图像总是有数字,但问题在于线条。我不能处理这些线,因为梯度和数字是一样的
# enable the following if loading tesseract from local server
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
# reading image using opencv
image = cv2.imread('C:\\images\\ipcaptcha\\asys.png', cv2.IMREAD_GRAYSCALE)
# Apply Threshold
# Dilate and Erode
kernel = np.ones((1, 1), np.uint8)
image = cv2.GaussianBlur(image, (kernel, kernel), 0)
ret, thresh_img = cv2.threshold(image, 0, 255, cv2.THRESH_OTSU)
image = cv2.dilate(image, np.ones((2, 2), np.uint8))
image = cv2.erode(image, np.ones((3, 3), np.uint8))
scale_percent = 300
width = int(image.shape[1] * scale_percent / 100)
height = int(image.shape[0] * scale_percent / 100)
dim = (width, height)
image = cv2.resize(image, dim, interpolation=cv2.INTER_AREA)
h = image.shape[0]
w = image.shape[1]
# configuring parameters for tesseract
custom_config = r'--psm 8 --oem 3 -c tessedit_char_whitelist=0123456789'
# now feeding image to tesseract
details = pytesseract.image_to_data(image, output_type=Output.DICT, config=custom_config, lang='eng')
total_boxes = len(details['text'])
for sequence_number in range(total_boxes):
if int(details['conf'][sequence_number]) > 30:
(x, y, w, h) = (
details['left'][sequence_number], details['top'][sequence_number], details['width'][sequence_number],
details['height'][sequence_number])
threshold_img = cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
# get the details read by tesseract
show_text = details['text']
final1 = []
for i in show_text:
if i is not '' and len(i) > 1:
for j in i:
if j.isdigit():
final1.append(j)
final1 = ''.join(final1)
final = final1
if len(final1) > 6:
final = list(final1)
final.pop(0)
final = "".join(final)
cv2.imshow('captured text', image)
# Maintain output window until user presses a key
cv2.waitKey(0)
# Destroying present windows on screen
cv2.destroyAllWindows()
print(final)
print("The End")
目前没有回答
相关问题 更多 >
编程相关推荐