如何在python中快速求和大型numpy数组?

2024-09-27 02:26:24 发布

您现在位置:Python中文网/ 问答频道 /正文

我正试图编写一些python代码来解决一个特定的能量最小化问题,但是我遇到了一个瓶颈:将很多非常大的4维数组一起添加到一起。在

对于这个问题,我得到了一些拉格朗日函数,从中我可以定义一个更新函数,根据要更新的事物的导数。我用有限差分找到这些导数,这是比较快的。然后我把它们输入到一个函数中,我将用它来更新n(向量场)。下面的每个变量都是一个维数为200x200x200x3的numpy数组,除了dx、dy、dz、k11、k22、k33和q0之外,它们都是浮动的。在

我试过用numba和cython来加速它,这给了一些最小的加速(也许10%)。虽然我不太熟悉它们,所以我可能没有正确地执行它们。此外,我已经在MatLab中输入了这个问题,它执行同样的操作的速度是原来的两倍。理想情况下,我希望坚持使用python。有什么方法可以加快数组的运算速度吗?谢谢你抽出时间。在

import numpy as np

def F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0):

    #some code to calculate derivitives
    xn=n[:,:,:,0]
    yn=n[:,:,:,1]
    zn=n[:,:,:,2]

    xnx=(np.roll(xn[:,:,:],1,0)-np.roll(xn[:,:,:],-1,0))/(2*dx)
    xny=(np.roll(xn[:,:,:],1,1)-np.roll(xn[:,:,:],-1,1))/(2*dy)
    xnz=(np.roll(xn[:,:,:],1,2)-np.roll(xn[:,:,:],-1,2))/(2*dz)
    ynx=(np.roll(yn[:,:,:],1,0)-np.roll(yn[:,:,:],-1,0))/(2*dx)
    yny=(np.roll(yn[:,:,:],1,1)-np.roll(yn[:,:,:],-1,1))/(2*dy)
    ynz=(np.roll(yn[:,:,:],1,2)-np.roll(yn[:,:,:],-1,2))/(2*dz)
    znx=(np.roll(zn[:,:,:],1,0)-np.roll(zn[:,:,:],-1,0))/(2*dx)
    zny=(np.roll(zn[:,:,:],1,1)-np.roll(zn[:,:,:],-1,1))/(2*dy)
    znz=(np.roll(zn[:,:,:],1,2)-np.roll(zn[:,:,:],-1,2))/(2*dz)



    xnxx=(np.roll(xn[:,:,:],1,0)+np.roll(xn[:,:,:],-1,0)-2*xn[:,:,:])/dx**2
    xnyy=(np.roll(xn[:,:,:],1,1)+np.roll(xn[:,:,:],-1,1)-2*xn[:,:,:])/dx**2
    xnzz=(np.roll(xn[:,:,:],1,2)+np.roll(xn[:,:,:],-1,2)-2*xn[:,:,:])/dx**2
    ynxx=(np.roll(yn[:,:,:],1,0)+np.roll(yn[:,:,:],-1,0)-2*yn[:,:,:])/dy**2
    ynyy=(np.roll(yn[:,:,:],1,1)+np.roll(yn[:,:,:],-1,1)-2*yn[:,:,:])/dy**2
    ynzz=(np.roll(yn[:,:,:],1,2)+np.roll(yn[:,:,:],-1,2)-2*yn[:,:,:])/dy**2
    znxx=(np.roll(zn[:,:,:],1,0)+np.roll(zn[:,:,:],-1,0)-2*zn[:,:,:])/dz**2
    znyy=(np.roll(zn[:,:,:],1,1)+np.roll(zn[:,:,:],-1,1)-2*zn[:,:,:])/dz**2
    znzz=(np.roll(zn[:,:,:],1,2)+np.roll(zn[:,:,:],-1,2)-2*zn[:,:,:])/dz**2


    xnxy=(np.roll(np.roll(xn[:,:,:],1,0),1,1)-np.roll(np.roll(xn[:,:,:],1,0),-1,1)-np.roll(np.roll(xn[:,:,:],-1,0),1,1)+np.roll(np.roll(xn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    xnxz=(np.roll(np.roll(xn[:,:,:],1,0),1,2)-np.roll(np.roll(xn[:,:,:],1,0),-1,2)-np.roll(np.roll(xn[:,:,:],-1,0),1,2)+np.roll(np.roll(xn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    xnyz=(np.roll(np.roll(xn[:,:,:],1,1),1,2)-np.roll(np.roll(xn[:,:,:],1,1),-1,2)-np.roll(np.roll(xn[:,:,:],-1,1),1,2)+np.roll(np.roll(xn[:,:,:],-1,1),-1,2))/(4*dy*dz)
    ynxy=(np.roll(np.roll(yn[:,:,:],1,0),1,1)-np.roll(np.roll(yn[:,:,:],1,0),-1,1)-np.roll(np.roll(yn[:,:,:],-1,0),1,1)+np.roll(np.roll(yn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    ynxz=(np.roll(np.roll(yn[:,:,:],1,0),1,2)-np.roll(np.roll(yn[:,:,:],1,0),-1,2)-np.roll(np.roll(yn[:,:,:],-1,0),1,2)+np.roll(np.roll(yn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    ynyz=(np.roll(np.roll(yn[:,:,:],1,1),1,2)-np.roll(np.roll(yn[:,:,:],1,1),-1,2)-np.roll(np.roll(yn[:,:,:],-1,1),1,2)+np.roll(np.roll(yn[:,:,:],-1,1),-1,2))/(4*dy*dz)
    znxy=(np.roll(np.roll(zn[:,:,:],1,0),1,1)-np.roll(np.roll(zn[:,:,:],1,0),-1,1)-np.roll(np.roll(zn[:,:,:],-1,0),1,1)+np.roll(np.roll(zn[:,:,:],-1,0),-1,1))/(4*dx*dy)
    znxz=(np.roll(np.roll(zn[:,:,:],1,0),1,2)-np.roll(np.roll(zn[:,:,:],1,0),-1,2)-np.roll(np.roll(zn[:,:,:],-1,0),1,2)+np.roll(np.roll(zn[:,:,:],-1,0),-1,2))/(4*dx*dz)
    znyz=(np.roll(np.roll(zn[:,:,:],1,1),1,2)-np.roll(np.roll(zn[:,:,:],1,1),-1,2)-np.roll(np.roll(zn[:,:,:],-1,1),1,2)+np.roll(np.roll(zn[:,:,:],-1,1),-1,2))/(4*dy*dz)


    #code to find the minimization steps
    x_update = -1.0*k11*(xnxx + ynxy + znxz) - 1.0*k22*(ynz - zny)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynz + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*zny - 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*zn + 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*yn + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xny + ((xny - ynx)*yn + (xnz - znx)*zn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*xn + ((xny - ynx)*yny + (xnz - znx)*zny + (xnyy - ynxy)*yn + (xnyz - znxy)*zn)*yn) - 1.0*k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znz + ((xnz - znx)*xn + (ynz - zny)*yn)*xnz + ((xny - ynx)*ynz + (xnz - znx)*znz + (xnyz - ynxz)*yn + (xnzz - znxz)*zn)*zn + ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*xn) + xB + xE

    y_update = -1.0*k11*(xnxy + ynyy + znyz) + k22*(xnz - znx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xnz - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*znx + 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*zn - 1.0*k22*((xny - ynx)*znz - (xnz - znx)*ynz + (ynz - zny)*xnz + (xnyz - ynxz)*zn - (xnzz - znxz)*yn + (ynzz - znyz)*xn)*xn + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*(xny - ynx) + ((xnz - znx)*xn + (ynz - zny)*yn)*(ynz - zny)) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*xnx + ((xny - ynx)*yn + (xnz - znx)*zn)*ynx + ((xny - ynx)*xnx - (ynz - zny)*znx + (xnxy - ynxx)*xn - (ynxz - znxy)*zn)*xn + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*yn) + k33*(((xny - ynx)*xn - (ynz - zny)*zn)*znz - ((xnz - znx)*xn + (ynz - zny)*yn)*ynz + ((xny - ynx)*xnz - (ynz - zny)*znz + (xnyz - ynxz)*xn - (ynzz - znyz)*zn)*zn - ((xnz - znx)*xnz + (ynz - zny)*ynz + (xnzz - znxz)*xn + (ynzz - znyz)*yn)*yn) + yB + yE

    z_update = -1.0*k11*(xnxz + ynyz + znzz) - 1.0*k22*(xny - ynx)*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn) - 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*xny + 1.0*k22*(q0 - (xny - ynx)*zn + (xnz - znx)*yn - (ynz - zny)*xn)*ynx - 1.0*k22*((xny - ynx)*znx - (xnz - znx)*ynx + (ynz - zny)*xnx + (xnxy - ynxx)*zn - (xnxz - znxx)*yn + (ynxz - znxy)*xn)*yn + 1.0*k22*((xny - ynx)*zny - (xnz - znx)*yny + (ynz - zny)*xny + (xnyy - ynxy)*zn - (xnyz - znxy)*yn + (ynyz - znyy)*xn)*xn - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*(ynz - zny) - ((xny - ynx)*yn + (xnz - znx)*zn)*(xnz - znx)) - 1.0*k33*(((xny - ynx)*xn - (ynz - zny)*zn)*zny - ((xnz - znx)*xn + (ynz - zny)*yn)*yny + ((xny - ynx)*xny - (ynz - zny)*zny + (xnyy - ynxy)*xn - (ynyz - znyy)*zn)*zn - ((xnz - znx)*xny + (ynz - zny)*yny + (xnyz - znxy)*xn + (ynyz - znyy)*yn)*yn) + k33*(((xny - ynx)*yn + (xnz - znx)*zn)*znx + ((xnz - znx)*xn + (ynz - zny)*yn)*xnx + ((xny - ynx)*ynx + (xnz - znx)*znx + (xnxy - ynxx)*yn + (xnxz - znxx)*zn)*zn + ((xnz - znx)*xnx + (ynz - zny)*ynx + (xnxz - znxx)*xn + (ynxz - znxy)*yn)*xn) + zB + zE

    return x_update,y_update,z_update


#COMPUTATIONAL PARAMETERS
#define computational size of cell
blocks_x=50
blocks_y=50
blocks_z=50
#define experimental size of cell in micro-meters
dimx=float(1e-6)
dimy=float(1e-6)
dimz=float(1e-6)
#define step size
dx=dimx/blocks_x
dy=dimy/blocks_y
dz=dimz/blocks_z




#EXPERIMENTAL PARAMETERS
#director profile
n = np.zeros((blocks_x,blocks_y,blocks_z,3))
n[:,:,:,0]=1

#elastic constants
k11=float(1e-12)        #splay
k22=float(1e-12)        #twist 
k33=float(1e-12)        #bend
k24=float(1e-12)        #saddle splay

#twistedness
pitch = float(1e-6)/1.5
q0 =2*np.pi/pitch  #chiral wavenumber

#applied magnetic field
E=np.zeros((blocks_x,blocks_y,blocks_z,3))
E[:,:,:,1]=float(1e2)

#applied magnetic field
B=np.zeros((blocks_x,blocks_y,blocks_z,3))
B[:,:,:,2]=float(1e2)
#viscocity


xE=E[:,:,:,0]
yE=E[:,:,:,1]
zE=E[:,:,:,2]
xB=B[:,:,:,0]
yB=B[:,:,:,1]
zB=B[:,:,:,2]

%timeit -n 100 F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0)


编辑:

更新了代码,现在可以运行了。magic命令%timeit每循环输出107ms,输入n为(50,50,50,3)。可以通过重新定义blocks_x、blocks_y和blocks_z来修改输入的大小


Tags: nprollyndyblocksdxxnzn
2条回答

这些方程中有多个重复项。将它们分解到单个变量中,并且只做一次。在

def F(n,xE,yE,zE,xB,yB,zB,dx,dy,dz,k11,k22,k33,q0):

    #some code to calculate derivitives

    # common terms for minimization steps
    alpha = (xny - ynx)    # 32 of these
    alpha1 = alpha*zn

    beta = (xnz - znx)    # 32 of these
    beta1 = beta*yn

    gamma = (ynz - zny)    # 32 of these
    gamma1 = gamma*xn

    kappa = (q0 - alpha1 + beta1 - gamma1)    # 10 of these

    theta = (xnyz - znxy)    # 4 each of this group
    zeta = (xnyz - ynxz)
    rho = (xnyy - ynxy)
    tau = (xnxy - ynxx)
    upsilon = (xnxz - znxx)
    phi = (ynyz - znyy)
    chi = (ynxz - znxy)
    psi = (xnzz - znxz)
    omega = (ynzz - znyz)

    #code to find the minimization steps
    x_update = -1.0*k11*(xnxx + ynxy + znxz) - 1.0*k22*gamma*kappa - 1.0*k22*kappa*ynz + 1.0*k22*kappa*zny - 1.0*k22*(alpha*zny - beta*yny + gamma*xny + rho*zn - theta*yn + phi*xn)*zn + 1.0*k22*(alpha*znz - beta*ynz + gamma*xnz + zeta*zn - psi*yn + omega*xn)*yn + k33*((alpha*xn - gamma*zn)*alpha + (beta*xn + gamma*yn)*beta) - 1.0*k33*((alpha*xn - gamma*zn)*xny + (alpha*yn + beta*zn)*yny + (alpha*xny - gamma*zny + rho*xn - phi*zn)*xn + (alpha*yny + beta*zny + rho*yn + theta*zn)*yn) - 1.0*k33*((alpha*yn + beta*zn)*znz + (beta*xn + gamma*yn)*xnz + (alpha*ynz + beta*znz + zeta*yn + psi*zn)*zn + (beta*xnz + gamma*ynz + psi*xn + omega*yn)*xn) + xB + xE

    y_update = -1.0*k11*(xnxy + ynyy + znyz) + k22*beta*kappa + 1.0*k22*kappa*xnz - 1.0*k22*kappa*znx + 1.0*k22*(alpha*znx - beta*ynx + gamma*xnx + tau*zn - upsilon*yn + chi*xn)*zn - 1.0*k22*(alpha*znz - beta*ynz + gamma*xnz + zeta*zn - psi*yn + omega*xn)*xn + k33*((alpha*yn + beta*zn)*alpha + (beta*xn + gamma*yn)*gamma) + k33*((alpha*xn - gamma*zn)*xnx + (alpha*yn + beta*zn)*ynx + (alpha*xnx - gamma*znx + tau*xn - chi*zn)*xn + (alpha*ynx + beta*znx + tau*yn + upsilon*zn)*yn) + k33*((alpha*xn - gamma*zn)*znz - (beta*xn + gamma*yn)*ynz + (alpha*xnz - gamma*znz + zeta*xn - omega*zn)*zn - (beta*xnz + gamma*ynz + psi*xn + omega*yn)*yn) + yB + yE

    z_update = -1.0*k11*(xnxz + ynyz + znzz) - 1.0*k22*alpha*kappa - 1.0*k22*kappa*xny + 1.0*k22*kappa*ynx - 1.0*k22*(alpha*znx - beta*ynx + gamma*xnx + tau*zn - upsilon*yn + chi*xn)*yn + 1.0*k22*(alpha*zny - beta*yny + gamma*xny + rho*zn - theta*yn + phi*xn)*xn - 1.0*k33*((alpha*xn - gamma*zn)*gamma - (alpha*yn + beta*zn)*beta) - 1.0*k33*((alpha*xn - gamma*zn)*zny - (beta*xn + gamma*yn)*yny + (alpha*xny - gamma*zny + rho*xn - phi*zn)*zn - (beta*xny + gamma*yny + theta*xn + phi*yn)*yn) + k33*((alpha*yn + beta*zn)*znx + (beta*xn + gamma*yn)*xnx + (alpha*ynx + beta*znx + tau*yn + upsilon*zn)*zn + (beta*xnx + gamma*ynx + upsilon*xn + chi*yn)*xn) + zB + zE]



   return [x_update,y_update,z_update]

前几个是最多的。大量的搜索和替换来做到这一点,它还没有经过测试。在

我在括号里找到了一个术语,搜索并替换了它们。寻找更多被搜索和替换的模式(kappa是这里唯一的一个)。alpha1beta1和{}不是必需的,它们可以被包含在kappa赋值中。在

也许有人有一个解析器,可以编程实现这一点(sp??)-我用蛮力做的。在


所以我忍不住想把搜索和替换自动化。这就是我想到的——它应该确保最少的重复计算次数。同样地,结果也没有被测试为与原始语句等价。运行此命令后,新语句在new_statements:只需执行print(new_statements.read())以获得一个可以复制/粘贴到函数中的字符串:

^{pr2}$

您是否尝试过使用数组而不是长逻辑流的numpy.sum()特性?也可以指定沿单个轴运行(例如轴=2)

相关问题 更多 >

    热门问题