我希望加快打包列数据的速度
以下是我使用struct.pack()
和struct.pack_into()
尝试的4种方法:
from struct import pack, pack_into
from multiprocessing import RawArray, Pool
from time import time
from mmap import mmap
def init():
global shared_array, shared_mmap
def packer(ints_to_pack):
return pack(str(len(ints_to_pack)) + 'i', *ints_to_pack)
def pack_into_array(idx_nums_tup):
idx, ints_to_pack = idx_nums_tup
pack_into(str(len(ints_to_pack)) + 'i', shared_array, idx*4*total//2 , *ints_to_pack)
def pack_into_mmap(idx_nums_tup):
idx, ints_to_pack = idx_nums_tup
pack_into(str(len(ints_to_pack)) + 'i', shared_mmap, idx*4*total//2 , *ints_to_pack)
if __name__ == '__main__':
total = 5 * 10**7
shared_array = RawArray('i', total)
shared_mmap = mmap(-1, total * 4)
ints_to_pack = range(total)
pool = Pool()
# pool = Pool(initializer = init) # not needed?
# Serial packing
start = time()
res = packer(ints_to_pack)
print ("total serial packing:", time() - start)
# Parallel map packing
start = time()
res = pool.map(packer, (ints_to_pack[:total//2], ints_to_pack[total//2:]))
print ("total pool packing:", time() - start)
# Shared Array packing
start = time()
pool.map(pack_into_array, enumerate((ints_to_pack[:total//2], ints_to_pack[total//2:])))
print ("total shared packing:", time() - start)
# Shared mmap packing
start = time()
pool.map(pack_into_mmap, enumerate((ints_to_pack[:total//2], ints_to_pack[total//2:])))
print ("total mmap packing:", time() - start)
# print (bytearray(shared_array) == shared_mmap[:])
样本结果:
total serial packing: 4.2776854038238525
total pool packing: 3.5881083011627197
total shared packing: 2.55037784576416
total mmap packing: 2.3132405281066895
但结果并不一致。在大多数情况下,打包到一个map中似乎与打包到一个RawArray中相当,或者有点慢
这让我相信我不是做错了什么,误用了mp,就是误解了mmap
第四种方法不应该是最快的吗
目前没有回答
相关问题 更多 >
编程相关推荐