python二维卷积优化

import matplotlib.pyplot as plt import matplotlib.image as mpimg from PIL import Image import numpy as np img = mpimg.imread('benfrank.png') imgCopy = img.copy() Width = 1200 Height = 1464 x1 = 0 y1 = 0 cWidth = 3 cHeight = 3 convul = np.array([[0,0,-5], [0,1,0], [-5,0,0]]) summ = convul[2,2]+convul[2,1]+convul[2,0]+convul[1,2]+convul[1,1]+convul[1,0]+convul[0,2]+convul[0,1]+convul[0,0] def convulute3x3(x,y): global convul global img,imgCopy, Width, Height, summ i = x j = y if(i < 1 or i > Width-2 ): return elif(j < 1 or j > Height-2 ): return for c in range(3): n11 = img[j-1,i-1,c]*convul[0,0] n22 = img[j-1,i,c]*convul[1,0] n33 = img[j-1,i+1,c]*convul[2,0] n44= img[j,i-1,c]*convul[0,1] n55 = img[j,i,c]*convul[1,1] n66 = img[j,i+1,c]*convul[2,1] n77 = img[j+1,i-1,c]*convul[0,2] n88 = img[j+1,i,c]*convul[1,2] n99 = img[j+1,i+1,c]*convul[2,2] color = (n11+n22+n33+n44+n55+n66+n77+n88+n99)/summ imgCopy[j,i,c] = color for x in img: x1=0 for y in x: convulute3x3(x1,y1) x1 = x1+1 y1 = y1+1 plt.imshow(imgCopy) plt.show()

1条回答

网友

1楼 · 发布于 2024-10-03 13:21:12

正如@Reti43在评论中提到的那样，已经存在这样做的库，但我怀疑您只是想玩一些自制的实现

我也对如何在Python中手动实现卷积感兴趣。Python循环非常慢，如果您关心速度，那么应该远离纯粹的Python循环，而是坚持使用更矢量化的方法

到目前为止，我最好的方法是使用numpy.lib.stride_tricks.as_strided，它允许您获得非常定制的numpy数组视图。我使用as_strided获得图像的滑动窗口视图，然后使用np.tensordot对内核执行“更一般的矩阵乘法”（docs）。此外，NUMPY1.20（iirc）有numpy.lib.stride_tricks.sliding_window_view，这是我下面代码的一个不太通用的版本（截至目前），因为它不能进行自定义跨步

import numpy as np 
from numpy.lib.stride_tricks import as_strided


def get_sliding_window(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    imgChannels, imgRows, imgCols = x.shape
    _, kernelRows, kernelCols = k.shape
    u = np.array(x.itemsize) # Used to scale stride size, as_astrided wants stride sizes in bits
    return as_strided(x,
        shape=((imgRows-kernelRows)//rowstride+1, (imgCols-kernelCols)//colstride+1, imgChannels, kernelRows, kernelCols), 
        strides=u*(imgCols*rowstride, colstride, imgRows*imgCols, imgCols, 1)
    )


def conv2d(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    """
    Performs 2d convolution on images with arbitrary number of channels where you can
    specify the strides as well. 

    x: np.ndarray, image array of shape (C x N x M), where C is number of channels
    k: np.ndarray, convolution kernel of shape (C x P x Q), where C is number of channels
    rowstride: int, "vertical" step size
    colstride: int, "horizontal" step size
    """
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    return np.tensordot(sliding_window_view, k, axes=3)


x = np.array([
    [[1,1,1,1],
     [1,1,1,1],
     [2,2,2,2],
     [2,2,2,2]], 

    [[1,1,2,2],
     [1,1,2,2],
     [4,4,8,8],
     [4,4,8,8]]
])


k = np.array([
    [[1,1],  
     [1,1]],

    [[1,1],  
     [1,1]]
]) / 8

print(conv2d(x,k,1,1))
#[[1.    1.25  1.5  ]
# [2.    2.625 3.25 ]
# [3.    4.    5.   ]]

print(conv2d(x,k,2,2))
#[[1.  1.5]
# [3.  5. ]]

奖金

我实现了一个ascii可视化功能来检查滑动窗口是否正确：

import time
def conv2d_asciiviz(x: np.ndarray, k: np.ndarray, rowstride: int, colstride: int):
    x = x.copy().astype(object)
    sliding_window_view = get_sliding_window(x, k, rowstride, colstride)
    highlighter = np.vectorize(lambda x: f"\x1b[33m{x}\x1b[0m")
    r = np.full(sliding_window_view.shape[:2], np.nan)
    with np.printoptions(nanstr="", formatter={"all":lambda x: str(x)}):
        for i, row in enumerate(sliding_window_view):
            for j, window in enumerate(row):
                temp = window.copy()
                r[i,j] = np.tensordot(window, k, axes=3)
                window[...] = highlighter(window)
                print(f"\x1b[JChannels:\n{x}\n\nResult:\n{str(r)}\x1b[{x.shape[0]*x.shape[1]+len(r)+4}A")
                window[...] = temp
                time.sleep(0.69)
    print(f"\x1b[{x.shape[0]*x.shape[1]+len(r)+4}B")
    return r

print("Output:\n",conv2d(x,k,1,1))

奖金

相关问题更多 >

编程相关推荐

热门问题

热门文章