Theano:如何有效地撤消/逆转最大池

import numpy as np import theano import theano.tensor as T minibatchsize = 2 numfilters = 3 numsamples = 4 upsampfactor = 5 # HERE is the function that I hope could be improved def upsamplecode(encoded, auxpos): shp = encoded.shape upsampled = T.zeros((shp[0], shp[1], shp[2] * upsampfactor)) for whichitem in range(minibatchsize): for whichfilt in range(numfilters): upsampled = T.set_subtensor(upsampled[whichitem, whichfilt, auxpos[whichitem, whichfilt, :]], encoded[whichitem, whichfilt, :]) return upsampled totalitems = minibatchsize * numfilters * numsamples code = theano.shared(np.arange(totalitems).reshape((minibatchsize, numfilters, numsamples))) auxpos = np.arange(totalitems).reshape((minibatchsize, numfilters, numsamples)) % upsampfactor # arbitrary positions within a bin auxpos += (np.arange(4) * 5).reshape((1,1,-1)) # shifted to the actual temporal bin location auxpos = theano.shared(auxpos.astype(np.int)) print "code:" print code.get_value() print "locations:" print auxpos.get_value() get_upsampled = theano.function([], upsamplecode(code, auxpos)) print "the un-pooled data:" print get_upsampled()

def upsamplecode2(encoded, auxpos): shp = encoded.shape upsampled = T.zeros((shp[0], shp[1], shp[2] * upsampfactor)) add_to_flattened_indices = theano.shared(np.array([ [[(y + z * numfilters) * numsamples * upsampfactor for x in range(numsamples)] for y in range(numfilters)] for z in range(minibatchsize)], dtype=theano.config.floatX).flatten(), name="add_to_flattened_indices") upsampled = T.set_subtensor(upsampled.flatten()[T.cast(auxpos.flatten() + add_to_flattened_indices, 'int32')], encoded.flatten()).reshape(upsampled.shape) return upsampled get_upsampled2 = theano.function([], upsamplecode2(code, auxpos)) print "the un-pooled data v2:" ups2 = get_upsampled2() print ups2

ERROR (theano.gof.opt): Optimization failure due to: local_gpu_advanced_incsubtensor1 ERROR (theano.gof.opt): TRACEBACK: ERROR (theano.gof.opt): Traceback (most recent call last): File "/usr/local/lib/python2.7/dist-packages/theano/gof/opt.py", line 1493, in process_node replacements = lopt.transform(node) File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/opt.py", line 952, in local_gpu_advanced_incsubtensor1 gpu_y = gpu_from_host(y) File "/usr/local/lib/python2.7/dist-packages/theano/gof/op.py", line 507, in __call__ node = self.make_node(*inputs, **kwargs) File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/basic_ops.py", line 133, in make_node dtype=x.dtype)()]) File "/usr/local/lib/python2.7/dist-packages/theano/sandbox/cuda/type.py", line 69, in __init__ (self.__class__.__name__, dtype, name)) TypeError: CudaNdarrayType only supports dtype float32 for now. Tried using dtype int64 for variable None

1条回答

网友

1楼 · 发布于 2024-10-02 02:42:31

我不知道这是否更快，但可能会更简洁一点。看看这对你的案子是否有用。在

import numpy as np
import theano
import theano.tensor as T

minibatchsize = 2
numfilters = 3
numsamples = 4
upsampfactor = 5

totalitems = minibatchsize * numfilters * numsamples

code = np.arange(totalitems).reshape((minibatchsize, numfilters, numsamples))

auxpos = np.arange(totalitems).reshape((minibatchsize, numfilters, numsamples)) % upsampfactor 
auxpos += (np.arange(4) * 5).reshape((1,1,-1))

# first in numpy
shp = code.shape
upsampled_np = np.zeros((shp[0], shp[1], shp[2] * upsampfactor))
upsampled_np[np.arange(shp[0]).reshape(-1, 1, 1), np.arange(shp[1]).reshape(1, -1, 1), auxpos] = code

print "numpy output:"
print upsampled_np

# now the same idea in theano
encoded = T.tensor3()
positions = T.tensor3(dtype='int64')
shp = encoded.shape
upsampled = T.zeros((shp[0], shp[1], shp[2] * upsampfactor))
upsampled = T.set_subtensor(upsampled[T.arange(shp[0]).reshape((-1, 1, 1)), T.arange(shp[1]).reshape((1, -1, 1)), positions], encoded)

print "theano output:"
print upsampled.eval({encoded: code, positions: auxpos})

相关问题更多 >

编程相关推荐

热门问题

热门文章