使用opencv从表中检测列

2024-09-28 03:22:51 发布

您现在位置:Python中文网/ 问答频道 /正文

我已经编写了一个从表图像中检测列的代码。我遵循了以下步骤:

  1. 从表映像中删除水平行
  2. 反转图像
  3. 使用(25,4)的内核进行2次迭代
  4. 在一个核为(2,6)的放大图像上应用腐蚀,迭代3次
  5. 计算结果图像上的图像列和。现在我发现窗内有山谷
  6. 将窗口大小计算为图像宽度的35%
  7. 计算水平阈值,以将山谷视为窗口区域内的山谷。(最小值最大值的变化阈值为0.1,高斯西格玛=15)
  8. 使用gaussian sigma=15平滑窗口区域,并通过设置高度=上面计算的水平阈值来查找低于水平阈值的山谷
  9. 现在我们有了一个列表,列出了所有可能出现误报的谷值(列坐标)

从上述列表中删除假阳性

  1. 跨文本区域的列(跨两个以上文本区域的阈值)
  2. 将大列空间中的多个列合并为一列

问题是:

  1. 若表格中单词之间的间距足以检测列,则将在此处绘制列:

extra column is drawn in description

  1. 如果列为空或没有足够的值来检测槽,则不会绘制该列

column is not drawn between check number and description

我需要这两个问题的解决方案。有没有其他方法可以更准确地完成这项任务

import cv2
from google.colab.patches import cv2_imshow
import os
from google.colab import files

import numpy as np 
import matplotlib.pyplot as plt
from itertools import groupby
from operator import itemgetter

from scipy.ndimage.filters import gaussian_filter1d

from scipy.signal import find_peaks
from cv2 import imwrite
import pandas as pd

image = cv2.imread("table_with_rows.png") #handle corner cases with jpeg images
#print(image.shape)
img=image
#Convert to grayscale
gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
#Threshold the grayscale image 
thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY_INV|cv2.THRESH_OTSU)[1] #
print(thresh.shape)
# Remove horizontal
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20,1))
detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)

cnts = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = cnts[0] if len(cnts) == 2 else cnts[1]
for c in cnts:
    cv2.drawContours(image, [c], -1, (0,255,0), 2)
    x,y,w,h = cv2.boundingRect(c)
    #print(x,y,w,h)
    if x == 0 or y==0:
      img[y-2:y+h+2,x:x+w+1]=255 #Taking two pixels above and below and making them white too
    else:
      img[y-2:y+h+2,x-1:x+w+1]=255 #If the bounding box doesn't start from zeroth pixel then we can use a pixel before the x co-od also
cv2.imwrite("row_removed_1.jpg",img)
cv2_imshow(image)

img = cv2.imread("row_removed_1.jpg", 0)
h, w = img.shape
window_size = int(w*0.35)



inv_img = 255-img
dilation_kernel = np.ones((25,2),np.uint8) 
dilation = cv2.dilate(inv_img,dilation_kernel,iterations = 2)
inv_dilation=cv2.bitwise_not(dilation)
erosion_kernel=np.ones((2,8),np.uint8) 
erosion = cv2.erode(dilation,erosion_kernel,iterations = 3)
cv2_imshow(erosion)


#Taking column-wise sum of pixel values in image (returns a list)
img_col_sum = np.sum(erosion,axis=0).tolist()

#Normalising the values of img col sum
for i in range(len(img_col_sum)):
  img_col_sum[i]=img_col_sum[i]/max(img_col_sum)

print('window_size :::',window_size)

print('*********** Image col sum graph after normalization ***********')
#Plotting the graph of columnwise sum
plt.plot(img_col_sum)
#plt.savefig("img_col_sum_" + filename)
plt.show()

deviation = 300 if window_size > 300 else window_size
peaks=[]

for i in range(0,len(img_col_sum),window_size):
  i = 0 if i==0 else (i-deviation) ## version2
  print('i===', i)
  window_val = img_col_sum[i:i+window_size+300]
  #print(window_val)


  ysmoothed_15 = gaussian_filter1d(window_val,sigma=15)
  #Plotting the smoothed graph (to pick the dips in graph)
  plt.plot(ysmoothed_15)
  #plt.savefig("img_col_sum_flattened_" + str(filename.split('.')[0]))
  plt.title('with sigma 15')
  plt.show()

  #Getting minimas of the smoothened graph and plotting
  min_peaks_15,_=find_peaks(-1*ysmoothed_15)
  a = np.array(ysmoothed_15)
  #print(a[peaks])
  plt.plot(ysmoothed_15)
  plt.plot(min_peaks_15,ysmoothed_15[min_peaks_15],"x", label='min - x')

  #max_minima_val = 0
  print('..... minimas for window .....')
  print(ysmoothed_15[min_peaks_15])
  max_minima_val = np.min(ysmoothed_15[min_peaks_15] if len(min_peaks_15)!=0 else 0 )
  print(max_minima_val)

  #Getting maximas of the smoothened graph and plotting
  max_peaks_15,_=find_peaks(ysmoothed_15)
  a = np.array(ysmoothed_15)
  #print(a[peaks])
  plt.plot(ysmoothed_15)
  plt.plot(max_peaks_15,ysmoothed_15[max_peaks_15],"o", label='max - o')

  plt.show()

  print('..... maximas for window .....')
  print(ysmoothed_15[max_peaks_15])
  max_maxima_val = np.max(ysmoothed_15[max_peaks_15] if len(max_peaks_15)!=0 else 1)
  print(max_maxima_val)

  diff = max_maxima_val - max_minima_val
  print('difference between minima and maxima point for the range of {0} - {1} ===> {2}'.format(i,(i+window_size+300),diff))

  #################### . calculate plateau ###########################

  ######## first check if plateau region is below threshold then only draw column 
  print('checking plateau............................!!!!!!')
  print(i)
  #if window_val[0] < height:
  diff1 = np.diff(window_val)
  #array([ 0,  0,  0,  2,  1, -2,  0,  0,  0])
  gradient = np.sign(diff1)
  if gradient[0] == 0:
    peaks.append(i)
    print('column will be drawn at :::: ', i)


  # if difference between minima and maxima is greater than certain threshold then only there is variation which can be considered as column    

  if diff > 0.1 :  
    height = max_minima_val + diff/1.5
    print('Threshold for the range of {0} - {1} ===> {2}'.format(i,(i+window_size+300),height))

    ysmoothed_20 = gaussian_filter1d(window_val,sigma=15)
    #Plotting the smoothed graph (to pick the dips in graph)
    #plt.plot(ysmoothed_20)
    #plt.savefig("img_col_sum_flattened_" + str(filename.split('.')[0]))
    #plt.show()

    #Getting minimas of the smoothened graph and plotting
    win_peaks_20,_=find_peaks(-1*ysmoothed_20, height = -1*height)
    a = np.array(ysmoothed_20)
    #print(a[peaks])
    plt.plot(ysmoothed_20)
    plt.plot(win_peaks_20,ysmoothed_20[win_peaks_20],"x")
    plt.title('with sigma 15')
    plt.show()

    x_cord_win_peaks = [x + i for x in win_peaks_20] 

    peaks.extend(x_cord_win_peaks)
    #print(peaks)

# for loop of window is completed

print('troughs by taking winodws',peaks)


########## after this false positive is removed through ocr response

Tags: theimportimgifnppltcolval

热门问题