用Cython提高Python函数的性能

import numpy import pandas class HMM(): ''' args: O: observation sequence. A list of 'H's or 'T's X: state sequence. 'S','M' or 'L's A: transition matrix, N by N B: Emission matrix, M by N M: Number of possibilities in emission matrix pi: initial transition matrix N: Number of states T: length of the observation sequence Q: possible hidden states (Xs) V: possible observations (Os) ''' def __init__(self,A,B,pi,O,X): self.A=A self.N=self.A.shape[0] self.B=B self.M=self.B.shape[1] self.pi=pi self.O=O self.T=len(O) self.Q=list(self.A.index) self.V=list(self.B.keys()) self.X=X def evaluate(self): ''' Solve the evaluation problem for HMMs by implementing the forward algorithm ''' c0=0 ct=numpy.zeros(self.T) alpha= numpy.zeros((self.T,self.N)) ## compute alpha[0] for i in range(self.N): pi0=self.pi[self.Q[i]] bi0=self.B.loc[self.Q[i]][self.O[0]] alpha[0,i]=pi0*bi0 c0+=alpha[0,i] ct[0]=alpha[0,i] ct[0]=1/ct[0]#[0] alpha=alpha*ct[0] for t in range(1,self.T): for i in range(self.N): for j in range(self.N): aji= self.A[self.Q[j]][self.Q[i]] alpha[t,j]= alpha[t-1,j]*aji ct[t]=ct[t]+alpha[t,i] ct[t]=1/ct[t] alpha=alpha*ct[t] return (alpha,ct)

import numpy cimport numpy cpdef evaluate_compiled(A,B,pi,O,X): ''' Solve the evaluation problem for HMMs by implementing the forward algorithm ''' T=len(O) N=len(list(set(X))) Q=list(set(X)) V=list(set(O)) c0=0 ct=numpy.zeros(T) alpha= numpy.zeros((T,N)) ## compute alpha[0] for i in range(N): pi0=pi[Q[i]] bi0=B.loc[Q[i]][O[0]] alpha[0,i]=pi0*bi0 c0+=alpha[0,i] ct[0]=alpha[0,i] ct[0]=1/ct[0]#[0] alpha=alpha*ct[0] for t in range(1,T): for i in range(N): for j in range(N): aji= A[Q[j]][Q[i]] alpha[t,j]= alpha[t-1,j]*aji ct[t]=ct[t]+alpha[t,i] ct[t]=1/ct[t] alpha=alpha*ct[t] return (alpha,ct)

try: from setuptools import setup from setuptools import Extension except ImportError: from distutils.core import setup from distutils.extension import Extension from Cython.Distutils import build_ext import numpy setup(cmdclass={'build_ext':build_ext}, ext_modules=[Extension('hmm', sources=['HMMCluster/hmm.pyx'], #File is in a directory called HMMCluster include_dirs=[numpy.get_include()])] )

cpdef evaluate_compiled_with_type_declaration(A,B,pi,O,X): cdef int t,i,j cdef int T = len(O) cdef int N = len(list(set(X))) cdef list Q = list(set(X)) cdef list V = list(set(O)) cdef float c0 = 0 # cdef numpy.ndarray ct = numpy.zeros(T,dtype=double) ## this caused compilation to fail ct=numpy.zeros(T) alpha= numpy.zeros((T,N)) for i in range(N): pi0=pi[Q[i]] bi0=B.loc[Q[i]][O[0]] alpha[0,i]=pi0*bi0 c0+=alpha[0,i] ct[0]=alpha[0,i] ct[0]=1/ct[0]#[0] alpha=alpha*ct[0] for t in range(1,T): for i in range(N): for j in range(N): aji= A[Q[j]][Q[i]] alpha[t,j]= alpha[t-1,j]*aji ct[t]=ct[t]+alpha[t,i] ct[t]=1/ct[t] alpha=alpha*ct[t] return (alpha,ct)

1条回答

网友

1楼 · 发布于 2024-09-24 00:30:12

在Cython中使用NumPy数组的“现代”方法是“类型化内存视图” http://docs.cython.org/en/latest/src/userguide/memoryviews.html

相应的参数必须声明为：

cpdef evaluate_compiled_with_type_declaration(double[:,:] A, double[:,:] B, double[:] pi, int[:] O, int[:] X):

（只是猜测类型和形状）。在

它们必须直接索引为A[i,j]，而不是A[i][j]

您可以声明结果数组并一次性填充它们。在

^{pr2}$

有关优化，请参见编译器指令http://cython.readthedocs.io/en/latest/src/reference/compilation.html?highlight=cdivision#compiler-directives（特别是cdivision和{}）

不应通过list(set(...))转换输入数据，因为集合失去了顺序。在

要检查您的代码是否已“编译”，请按照Warren Weckesser的建议在文件上使用cython -a

相关问题更多 >

编程相关推荐

热门问题

热门文章