为pyopengl和numpy构建交叉缓冲区

#!/usr/bin/python import timeit import numpy import ctypes import random USE_RANDOM=True USE_STATIC_BUFFER=True STATIC_BUFFER = numpy.empty(4096*20, dtype=numpy.float32) def render(i): # pretend these are different each time if USE_RANDOM: tex_left, tex_right, tex_top, tex_bottom = random.random(), random.random(), random.random(), random.random() left, right, top, bottom = random.random(), random.random(), random.random(), random.random() else: tex_left, tex_right, tex_top, tex_bottom = 0.0, 1.0, 1.0, 0.0 left, right, top, bottom = -1.0, 1.0, 1.0, -1.0 ibuffer = ( tex_left, tex_bottom, left, bottom, 0.0, # Lower left corner tex_right, tex_bottom, right, bottom, 0.0, # Lower right corner tex_right, tex_top, right, top, 0.0, # Upper right corner tex_left, tex_top, left, top, 0.0, # upper left ) return ibuffer # create python list.. convert to numpy array at end def create_array_1(): ibuffer = [] for x in xrange(4096): data = render(x) ibuffer += data ibuffer = numpy.array(ibuffer, dtype=numpy.float32) return ibuffer # numpy.array, placing individually by index def create_array_2(): if USE_STATIC_BUFFER: ibuffer = STATIC_BUFFER else: ibuffer = numpy.empty(4096*20, dtype=numpy.float32) index = 0 for x in xrange(4096): data = render(x) for v in data: ibuffer[index] = v index += 1 return ibuffer # using slicing def create_array_3(): if USE_STATIC_BUFFER: ibuffer = STATIC_BUFFER else: ibuffer = numpy.empty(4096*20, dtype=numpy.float32) index = 0 for x in xrange(4096): data = render(x) ibuffer[index:index+20] = data index += 20 return ibuffer # using numpy.concat on a list of ibuffers def create_array_4(): ibuffer_concat = [] for x in xrange(4096): data = render(x) # converting makes a diff! data = numpy.array(data, dtype=numpy.float32) ibuffer_concat.append(data) return numpy.concatenate(ibuffer_concat) # using numpy array.put def create_array_5(): if USE_STATIC_BUFFER: ibuffer = STATIC_BUFFER else: ibuffer = numpy.empty(4096*20, dtype=numpy.float32) index = 0 for x in xrange(4096): data = render(x) ibuffer.put( xrange(index, index+20), data) index += 20 return ibuffer # using ctype array CTYPES_ARRAY = ctypes.c_float*(4096*20) def create_array_6(): ibuffer = [] for x in xrange(4096): data = render(x) ibuffer += data ibuffer = CTYPES_ARRAY(*ibuffer) return ibuffer def equals(a, b): for i,v in enumerate(a): if b[i] != v: return False return True if __name__ == "__main__": number = 100 # if random, don't try and compare arrays if not USE_RANDOM and not USE_STATIC_BUFFER: a = create_array_1() assert equals( a, create_array_2() ) assert equals( a, create_array_3() ) assert equals( a, create_array_4() ) assert equals( a, create_array_5() ) assert equals( a, create_array_6() ) t = timeit.Timer( "testing2.create_array_1()", "import testing2" ) print 'from list:', t.timeit(number)/number*1000.0, 'ms' t = timeit.Timer( "testing2.create_array_2()", "import testing2" ) print 'array: indexed:', t.timeit(number)/number*1000.0, 'ms' t = timeit.Timer( "testing2.create_array_3()", "import testing2" ) print 'array: slicing:', t.timeit(number)/number*1000.0, 'ms' t = timeit.Timer( "testing2.create_array_4()", "import testing2" ) print 'array: concat:', t.timeit(number)/number*1000.0, 'ms' t = timeit.Timer( "testing2.create_array_5()", "import testing2" ) print 'array: put:', t.timeit(number)/number*1000.0, 'ms' t = timeit.Timer( "testing2.create_array_6()", "import testing2" ) print 'ctypes float array:', t.timeit(number)/number*1000.0, 'ms'

3条回答

网友

1楼 · 编辑于 2024-09-28 23:14:36

我知道这看起来很奇怪，但你试过了吗？在

网友

2楼 · 编辑于 2024-09-28 23:14:36

numpy的好处不是通过简单地将数据存储在数组中实现的，而是通过跨数组中的多个元素执行操作来实现的，而不是逐个执行。您的示例可以简化并优化为这个简单的解决方案，并具有数量级的加速：

numpy.random.standard_normal(4096*20)

……这不是很有帮助，但它确实暗示了成本在哪里。在

这里是一个渐进式的改进，通过消除4096个元素的迭代，它优于list append解决方案（但只是稍微好一点）。在

^{pr2}$

。。。但不是我们想要的加速。在

真正的节省将通过重新计算渲染例程来实现，这样您就不必为最终放入缓冲区的每个值创建一个python对象。特克斯左，特克斯右…等等从哪里来？它们是计算出来的还是读出来的？在

网友

3楼 · 编辑于 2024-09-28 23:14:36

创建_array_1的速度快得多的原因似乎是（python）列表中的项都指向同一个对象。如果您测试：

print (ibuffer[0] is ibuffer[1])

在子程序中。在create_array_1中，这是真的（在创建numpy数组之前），而在create_array_2中，这个值总是为false。我想这意味着数组转换中的数据转换步骤只需在create_array_1中发生一次，而在create_array_2中只需执行4096次。在

如果这是原因，我想如果你让渲染生成随机数据，计时会有所不同。Create_array_5最慢，因为每次向末尾添加数据时，它都会生成一个新数组。在

相关问题更多 >

编程相关推荐

热门问题

热门文章