基于GSL-MonteCarlo最小化的Cython结构指针

%%cython #!python #cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True from libc.stdlib cimport rand, RAND_MAX, calloc, malloc, realloc, free, abort from libc.math cimport log #Use the CythonGSL package to get the low-level routines from cython_gsl cimport * ######################### Define the Data Structure ############################ cdef struct Parameters: #Pointer for Y data array double* Y #size of the array int* Size ################ Support Functions for Monte-Carlo Function ################## #Create a function that allocates the memory and verifies integrity cdef void alloc_struct(Parameters* data, int N, unsigned int flag) nogil: #allocate the data array initially if flag==1: data.Y = <double*> malloc(N * sizeof(double)) #reallocate the data array else: data.Y = <double*> realloc(data.Y, N * sizeof(double)) #If the elements of the struct are not properly allocated, destory it and return null if N!=0 and data.Y==NULL: destroy_struct(data) data = NULL #Create the destructor of the struct to return memory to system cdef void destroy_struct(Parameters* data) nogil: free(data.Y) free(data) #This function fills in the Y observed variable with discreet 0/1 cdef void Y_fill(Parameters* data, double p_true, int* N) nogil: cdef: Py_ssize_t i double y for i in range(N[0]): y = rand()/<double>RAND_MAX if y <= p_true: data.Y[i] = 1 else: data.Y[i] = 0 #Definition of the function to be maximized: LLF of Bernoulli cdef double LLF(double p, void* data) nogil: cdef: #the sample structure (considered the parameter here) Parameters* sample #the total of the LLF double Sum = 0 #the loop iterator Py_ssize_t i, n sample = <Parameters*> data n = sample.Size[0] for i in range(n): Sum += sample.Y[i]*log(p) + (1-sample.Y[i])*log(1-p) return (-(Sum/n)) ########################## Monte-Carlo Function ############################## def Monte_Carlo(int[::1] Samples, double[:,::1] p_hat, Py_ssize_t Sims, double p_true): #Define variables and pointers cdef: #Data Structure Parameters* Data #iterators Py_ssize_t i, j int status, GSL_CONTINUE, Iter = 0, max_Iter = 100 #Variables int N = Samples.shape[0] double start_val, a, b, tol = 1e-6 #GSL objects and pointer const gsl_min_fminimizer_type* T gsl_min_fminimizer* s gsl_function F #Set the GSL function F.function = &LLF #Allocate the minimization routine T = gsl_min_fminimizer_brent s = gsl_min_fminimizer_alloc(T) #allocate the struct Data = <Parameters*> malloc(sizeof(Parameters)) #verify memory integrity if Data==NULL: abort() #set the starting value start_val = rand()/<double>RAND_MAX try: for i in range(N): if i==0: #allocate memory to the data array alloc_struct(Data, Samples[i], 1) else: #reallocate the data array in the struct if #we are past the first run of outer loop alloc_struct(Data, Samples[i], 2) #verify memory integrity if Data==NULL: abort() #pass the data size into the struct Data.Size = &Samples[i] for j in range(Sims): #fill in the struct Y_fill(Data, p_true, Data.Size) #set the parameters for the GSL function (the samples) F.params = <void*> Data a = tol b = 1 #set the minimizer gsl_min_fminimizer_set(s, &F, start_val, a, b) #initialize conditions GSL_CONTINUE = -2 status = -2 while (status == GSL_CONTINUE and Iter < max_Iter): Iter += 1 status = gsl_min_fminimizer_iterate(s) start_val = gsl_min_fminimizer_x_minimum(s) a = gsl_min_fminimizer_x_lower(s) b = gsl_min_fminimizer_x_upper(s) status = gsl_min_test_interval(a, b, tol, 0.0) if (status == GSL_SUCCESS): print ("Converged:\n") p_hat[i,j] = start_val finally: destroy_struct(Data) gsl_min_fminimizer_free(s)

import numpy as np #Sample Sizes N = np.array([5,50,500,5000], dtype='i') #Parameters for MC T = 1000 p_true = 0.2 #Array of the outputs from the MC p_hat = np.empty((N.size,T), dtype='d') p_hat.fill(np.nan) Monte_Carlo(N, p_hat, T, p_true)

1条回答

网友

1楼 · 发布于 2024-09-28 19:34:16

这是一个有点推测性的答案，因为我没有安装GSL，所以很难测试它（如果它是错误的，请道歉！）你知道吗

我认为问题是底线

Sum += sample.Y[i]*log(p) + (1-sample.Y[i])*log(1-p)

看起来Y[i]可以是0或1。当p位于范围0-1的任一端时，它给出0*-inf = nan。在只有所有的Y都相同的情况下，这一点是最小值（因此解算器将可靠地结束在无效点）。幸运的是，您应该能够重写该行以避免得到nan：

if sample.Y[i]:
   Sum += log(p)
else:
   Sum += log(1-p)

（将生成nan的情况是未执行的情况）。你知道吗

我发现了第二个小问题：在alloc_struct中，如果出现错误，您需要data = NULL。这只影响本地指针，因此对Monte_Carlo中NULL的测试没有意义。最好从alloc_struct返回一个true或false标志并进行检查。我怀疑你是否犯了这个错误。你知道吗

编辑：另一个更好的选择是在分析中找到最小值：A log(p) + (1-A) log (1-p)的导数是A/p - (1-A)/(1-p)。平均所有sample.Y以找到A。找到导数为0的地方就得到p=A。（你要仔细检查我的工作情况！）。有了它，您可以避免使用GSL最小化例程。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章