如何基于imageHash对字典中的相似图像进行聚类,而不重复字典中的相同图像

2024-09-27 19:09:22 发布

您现在位置:Python中文网/ 问答频道 /正文

因此,我有一个包含图像的文件夹,我正试图在python字典中创建一个类似图像的集群,以Imagehash作为键,以类似图像的列表作为其值,如果图像已经存在于任何其他字典键列表中,我如何防止图像生成新的键这里是我迄今为止所做的代码:


from PIL import Image
import imagehash
import cv2
import numpy as np
import dhash
import distance

norm_cache: dict = dict()
def _get_image(image_path: str) -> Image:
   try:
       img_arr = cv2.imread(image_path)
       img_arr = cv2.resize(img_arr, (512, 512), interpolation=cv2.INTER_AREA)

       # Convert image into 3 channels if it contains 4
       if len(img_arr.shape) > 2 and img_arr.shape[2] == 4:
           img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGRA2BGR)

       # using W3C luminance calc to convert into gray-scale
       data = np.inner(img_arr, [299, 587, 114]) / 1000.0

       return Image.fromarray(np.uint8(data),"L")
   except SyntaxError:
       pass


def find_similar_images(userpath):
   import os
   global norm_cache
   def is_image(filename):
       f = filename.lower()
       return f.endswith(".png") or f.endswith(".jpg") or \
           f.endswith(".jpeg") or f.endswith(".bmp") or f.endswith(".gif")
   
   image_filenames = [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)]
   images = {}
   buffer = []
   for img in image_filenames:
       if (len(buffer) == 0):
           print("Original list is empty, Appending first image to buffer.")
           buffer.append(img)
           continue
       gray1 = _get_image(img)
       h1r,h1c =  dhash.dhash_row_col(gray1)
       hash1 = dhash.format_hex(h1r,h1c)
       images[hash1] = images.get(hash1, []) + [img]
       for each in buffer:
           if each in norm_cache:
               print(f"cached val found for {each}")
               gray2 = norm_cache[each]
               h2r,h2c = dhash.dhash_row_col(gray2)
               hash2 = dhash.format_hex(h2r,h2c)
           else:
               print("No cached_val found, Computing and storing in norm_cache")
               gray2 = _get_image(each)
               h2r,h2c = dhash.dhash_row_col(gray2)
               hash2 = dhash.format_hex(h2r,h2c)
               norm_cache[each] = gray2  # Update cache...
           print(f"Comparing ---> {img}:{hash1} with {each}:{hash2}")
           if(distance.hamming(hash1,hash2) <= 22):
             //what should i put in here 

   
   unique = 0
   for k, img_list in images.items():
       if(len(img_list) >= 1):
           print(''.join(img_list))
           unique = unique + 1
   print(unique)
   


if __name__ == '__main__':
   import sys, os
   userpath = <Image folder/>
   find_similar_images(userpath=userpath)
   

Tags: pathinimageimportnormcacheimgif

热门问题