#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# test_pil.py
#
# Copyright 2015 John Coppens <john@jcoppens.com>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
import pygtk
import gtk
import glob
import os.path as osp
from os import rename
import re
import subprocess as sp
temp_image = "/tmp/test_pil.png"
image_re = """\.(?:jpe?g|png|gif)$"""
class RecognizeDigits():
def __init__(self):
pass
def process(self, img, x0, y0, x1, y1):
""" Receive the gtk.Image, and the limits of the selected area (in
window coordinates!)
Call Tesseract on the area, and give the possibility to edit the
result.
Returns None if NO is pressed, and the OCR'd (and edited) text if OK
"""
pb = img.get_pixbuf().subpixbuf(x0, y0, x1-x0, y1-y0)
pb.save(temp_image, "png")
out = sp.check_output(("tesseract", temp_image, "stdout", "-psm 7", "digits"))
out = out.replace(" ", "").strip()
dlg = gtk.MessageDialog(type = gtk.MESSAGE_QUESTION,
flags = gtk.DIALOG_MODAL,
buttons = gtk.BUTTONS_YES_NO,
message_format = "The number read is:")
entry = gtk.Entry()
entry.set_text(out)
dlg.get_message_area().pack_start(entry)
entry.show()
response = dlg.run()
nr = entry.get_text()
dlg.destroy()
if response == gtk.RESPONSE_YES:
return nr
else:
return None
class FileSelector(gtk.VBox):
""" Provides a folder selector (at the top) and a list of files in the
selected folder. On selecting a file, the FileSelector calls the
function provided to the constructor (image_viewer)
"""
def __init__(self, image_viewer):
gtk.VBox.__init__(self)
self.image_viewer = image_viewer
fc = gtk.FileChooserButton('Select a folder')
fc.set_action(gtk.FILE_CHOOSER_ACTION_SELECT_FOLDER)
fc.connect("selection-changed", self.on_file_set)
self.pack_start(fc, expand = False, fill = True)
self.tstore = gtk.ListStore(str)
self.tview = gtk.TreeView(self.tstore)
self.tsel = self.tview.get_selection()
self.tsel.connect("changed", self.on_selection_changed)
renderer = gtk.CellRendererText()
col = gtk.TreeViewColumn(None, renderer, text = 0)
self.tview.append_column(col)
scrw = gtk.ScrolledWindow()
scrw.add(self.tview)
self.pack_start(scrw, expand = True, fill = True)
def on_file_set(self, fcb):
self.tstore.clear()
self.imgdir = fcb.get_filename()
for f in glob.glob(self.imgdir + "/*"):
if re.search(image_re, f):
self.tstore.append([osp.basename(f)])
def on_selection_changed(self, sel):
model, itr = sel.get_selected()
if itr != None:
base = model.get(itr, 0)
fname = self.imgdir + "/" + base[0]
self.image_viewer(fname)
class Status(gtk.Table):
""" Small status window which shows the coordinates for of the area
selected in the image
"""
def __init__(self):
gtk.Table.__init__(self)
self.attach(gtk.Label("X"), 1, 2, 0, 1, yoptions = gtk.FILL)
self.attach(gtk.Label("Y"), 2, 3, 0, 1, yoptions = gtk.FILL)
self.attach(gtk.Label("Top left:"), 0, 1, 1, 2, yoptions = gtk.FILL)
self.attach(gtk.Label("Bottom right:"), 0, 1, 2, 3, yoptions = gtk.FILL)
self.entries = {}
for coord in (("x0", 1, 2, 1, 2), ("y0", 2, 3, 1, 2),
("x1", 1, 2, 2, 3), ("y1", 2, 3, 2, 3)):
self.entries[coord[0]] = gtk.Entry()
self.entries[coord[0]].set_width_chars(6)
self.attach(self.entries[coord[0]],
coord[1], coord[2], coord[3], coord[4],
yoptions = gtk.FILL)
def set_top_left(self, x0, y0):
self.x0 = x0
self.y0 = y0
self.entries["x0"].set_text(str(x0))
self.entries["y0"].set_text(str(y0))
def set_bottom_right(self, x1, y1):
self.x1 = x1
self.y1 = y1
self.entries["x1"].set_text(str(x1))
self.entries["y1"].set_text(str(y1))
class ImageViewer(gtk.ScrolledWindow):
""" Provides a scrollwindow to move the image around. It also detects
button press and release events (left button), will call status
to update the coordinates, and will call task on button release
"""
def __init__(self, status, task = None):
gtk.ScrolledWindow.__init__(self)
self.task = task
self.status = status
self.drawing = False
self.prev_rect = None
self.viewport = gtk.Viewport()
self.viewport.connect("button-press-event", self.on_button_pressed)
self.viewport.connect("button-release-event", self.on_button_released)
self.viewport.set_events(gtk.gdk.BUTTON_PRESS_MASK | \
gtk.gdk.BUTTON_RELEASE_MASK)
self.img = gtk.Image()
self.viewport.add(self.img)
self.add(self.viewport)
def set_image(self, fname):
self.imagename = fname
self.img.set_from_file(fname)
def on_button_pressed(self, viewport, event):
if event.button == 1: # Left button: Select rectangle start
#self.x0, self.y0 = self.translate_coordinates(self.img, int(event.x), int(event.y))
self.x0, self.y0 = int(event.x), int(event.y)
self.status.set_top_left(self.x0, self.y0)
self.drawing = True
def on_button_released(self, viewport, event):
if event.button == 1: # Right button: Select rectangle end
#self.x1, self.y1 = self.translate_coordinates(self.img, int(event.x), int(event.y))
self.x1, self.y1 = int(event.x), int(event.y)
self.status.set_bottom_right(self.x1, self.y1)
if self.task != None:
res = self.task().process(self.img, self.x0, self.y0, self.x1, self.y1)
if res == None: return
newname = osp.split(self.imagename)[0] + '/' + res + ".jpeg"
rename(self.imagename, newname)
print "Renaming ", self.imagename, newname
class MainWindow(gtk.Window):
def __init__(self):
gtk.Window.__init__(self)
self.connect("delete-event", self.on_delete_event)
self.set_size_request(600, 300)
grid = gtk.Table()
# Image selector
files = FileSelector(self.update_image)
grid.attach(files, 0, 1, 0, 1,
yoptions = gtk.FILL | gtk.EXPAND, xoptions = gtk.FILL)
# Some status information
self.status = Status()
grid.attach(self.status, 0, 1, 1, 2,
yoptions = gtk.FILL, xoptions = gtk.FILL)
# The image viewer
self.viewer = ImageViewer(self.status, RecognizeDigits)
grid.attach(self.viewer, 1, 2, 0, 2)
self.add(grid)
self.show_all()
def update_image(self, fname):
self.viewer.set_image(fname)
def on_delete_event(self, wdg, data):
gtk.main_quit()
def run(self):
gtk.mainloop()
def main():
mw = MainWindow()
mw.run()
return 0
if __name__ == '__main__':
main()
#!/bin/bash
image=${1-c1.jpg}
# Make everything that is nearly black go fully black, everything else goes white. Median for noise
# convert -delay 500 c1.jpg c2.jpg c3.jpg -normalize -fuzz 25% -fill black -opaque black -fuzz 0 -fill white +opaque black -median 9 out.gif
convert "${image}" -normalize \
-fuzz 25% -fill black -opaque black \
-fuzz 0 -fill white +opaque black \
-median 9 tmp_$$.png
# Get height of image - h
h=$(identify -format "%h" "${image}")
# Generate strips that are 40%, 30%, 20% and 10% of image height
for pc in 40 30 20 10; do
# Calculate height of this strip in pixels - sh
((sh=(h*pc)/100))
# Calculate offset from top of picture to top of bottom strip - omax
((omax=h-sh))
# Calculate step size, there will be 20 steps
((step=omax/20))
# Cut strips sh pixels high from the picture starting at top and working down in 20 steps
for (( off=0;off<$omax;off+=$step)) do
t=$(printf "%05d" $off)
# Extract strip and resize to 80 pixels tall for tesseract
convert tmp_$$.png -crop x${sh}+0+${off} \
-resize x80 -median 3 -median 3 -median 3 \
-threshold 90% +repage slice_${pc}_${t}.png
# Run slice through tesseract, seeking only digits
tesseract slice_${pc}_${t}.png temp digits quiet
# Now try and assess quality of output :-) ... by counting number of digits
digits=$(tr -cd "[0-9]" < temp.txt)
ndigits=${#digits}
[ $ndigits -gt 0 ] && [ $ndigits -lt 6 ] && echo $ndigits:$digits
done
done | sort -n
奶牛618的输出(第一个数字是找到的位数)
^{pr2}$
奶牛2755的输出(第一个数字是找到的位数)
2:51
3:071
3:191
3:517
4:2155 <--- pretty close
4:2755 <--- nailed that puppy :-)
4:2755 <--- nailed that puppy :-)
4:5212
5:12755 <--- pretty close
使用PIL(Python图像库),您可以轻松地加载和处理图像。使用grayscale conversion,可以将RGB转换为灰度,这样更容易检测级别。如果你想设置图像的阈值(检测白板),有一个point() function可以让你映射颜色。在
另一方面,你可以写一个简单的程序
这会大大促进这个过程!使用TkInter、PyGTK、PyQt或其他一些窗口化工具箱,编写这个应该相对容易一些。在
编辑:我需要一个类似的程序来分类这里的图像-虽然不是OCR他们。所以我最终决定这是一个好时机,并做了第一次尝试(使用OCR!)。在试用之前备份一下你的图片! 快速手册:
以下是阿尔法预科课程:
我一直在重新审视这个问题,并在这一过程中获得了一些灵感。。。。在
Tesseract可以接受自定义字典,如果您再多挖掘一点,那么从v3.0开始,它接受命令行参数
digits
使其只识别数字,这似乎是一个很好的想法,可以满足您的需要。可能没有必要找到带数字的电路板-可能更容易用不同的图像切片多次运行Tesseract,让它自己尝试一下,因为这是它应该做的。
所以,我决定对图像进行预处理,把25%以内的黑色改为纯黑色,其他的都改为纯白色。这样可以得到这样的预处理图像:
接下来,我生成一系列图像并将其传递给Tesseract。我决定假设数字可能在图像高度的40%到10%之间,所以我在40%、30%、20%和10%的条带上做了一个循环。然后,我将条带从上到下滑动20步,将每个条带传递到Tesseract,直到条带基本上穿过图像的底部。在
以下是40%条带-动画的每一帧都传递给Tesseract:
以下是20%条带-动画的每一帧都传递给Tesseract:
得到这些条带后,我很好地调整了它们的大小,以适应Tesseract的最佳位置,并将它们从噪音中清理出来。然后,我将它们传递到Tesseract中,通过计算它找到的位数来评估识别的质量,有点粗糙。最后,我按位数对输出进行排序-估计位数越多越好。。。在
有一些粗糙的边缘和碎片,你可以丁克周围,但这是一个开始!在
奶牛618的输出(第一个数字是找到的位数)
^{pr2}$奶牛2755的输出(第一个数字是找到的位数)
奶牛3174的输出(第一个数字是找到的位数)
很酷的问题-谢谢!在
我在opencv的帮助下想出了一个非常简单的解决方案。在
调整图像大小,以便通过轮廓删除异常值(更容易测量区域)
在1/3的顶部裁剪区域
提取前景
^{3美元提取轮廓,如果轮廓精度较低,可以使用ERFilter提取文本
创建字符分类器并在文本候选中循环
结果:
完整的源代码放在github
相关问题 更多 >
编程相关推荐