扩展多项式以拟合数据:创建任意数量的函数参数

2024-10-03 00:32:07 发布

您现在位置:Python中文网/ 问答频道 /正文

我想将一个函数拟合到给定的数据集,如果不满足条件,则扩展该函数并重复该过程-类似于增加泰勒级数的最高阶。我的问题是我不知道如何扩展这个函数。这个函数应该是这样的

def func(x, a0,a1,a2,...)
    return (a0 * H0 + a1 * H1 + a2 * H2 + ...) (x)

H0(x),H1(x),H2(x),。。。是已知的函数。我希望这个方法使用20-1000个函数H0,H1,。。。所以我必须找到一种方法,不去定义每个参数a0,a1,a2,。。。用手

我的想法是用最大数量的参数定义函数,然后在循环中操作它(以某种方式减少参数数量,然后随着每次迭代增加参数数量)

# choose N: an arbitrary number of parameters
# create N-many functions H0, H1, ... HN-1 -> put them into an numpy array HArray

def func(x, parameters): # somehow reduce the number of parameters to N
    # convert parameters into numpy array -> parameterArray
    result = parameterArray * HArray (x) # (a * H0 + b * H1 + c * H2 + ...) (x)
    return result

# fit the function to a given dataset

作为完整代码

import numpy as np
from scipy.optimize import curve_fit

x = np.linspace(0,2*np.pi,num=10000) # xdata
y = np.sin(x) # ydata
error = 0.0001 # fit condition
K = 1000

for N in range(K):
    def func(x, parameters): # somehow reduce the number of parameters to N
        parameterArray = np.array([p for p in parameters])
        HArray = np.array([x**i for i in range(N)]) # a polynomial as example
        return parameterArray * HArray (x)

    popt, pcov = curve_fit(func, x, y)

    stdDev = np.sqrt(np.diag(pcov))
    if stdDev < error: break

为了使曲线拟合工作,函数需要适当数量的参数。我还想在拟合时固定最后的K-N参数,但我也不知道怎么做


Tags: 函数参数数量defa1npa0h1
1条回答
网友
1楼 · 发布于 2024-10-03 00:32:07

也就是说,如果我做对了,线性拟合。因此,不需要curve_fit。最简单的方法是:

import numpy as np

def h0( x ):
    return 1

def h1( x ):
    return x

def h2( x ):
    return x**2

def h3( x ):
    return x**3

def h4( x ):
    return np.sin( x )


def my_linear_fit( xdata, ydata, funclist ):
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    A = np.dot( ST, S )
    AI = np.linalg.inv( A )
    K = np.dot( AI, ST )
    sol = np.dot( K, ydata )
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov = s2 * np.dot( K, np.transpose( K ) )
    return sol, cov
    
"""
testing
"""

xl = np.linspace( -2, 8, 23 )
yl = np.fromiter( 
    ( 2.1 * h1( x ) + 0.21 * h3(x) + 1.56 *  h4(x) for x in xl),
    float
)

yn = yl + np.random.normal( size=len(xl), scale=0.2 )

opt, popt = my_linear_fit( xl, yn, ( h1, h3, h4 ) )

print( opt )
print( popt )

为了避免矩阵求逆等问题,可以使用矩阵分解等方法使其更复杂一些,但主要思想保持不变。如果输入函数需要附加参数,则可能需要使用lambda函数和/或相应地扩展代码

编辑1

达到非常高的阶数会导致矩阵求逆或浮点精度问题。我们可以通过使用mpmath来解决这个问题 请注意,numpy不再使用,现在需要以更手动的方式进行一些数据处理

import matplotlib.pyplot as plt
from mpmath import mpf
from mpmath import fabs
from mpmath import matrix
from mpmath import mp


def pol( x, n ):
    return x**n


def make_func_list( order ):
    out = list()
    for i in range( order + 1 ):
        out.append( lambda x, i=i: pol( x, i ) )
    return out


def my_linear_fit( xdata, ydata, funclist ):
    ymat = matrix( ydata )
    ST = matrix( [ [ f( x ) for x in xdata ] for f in funclist ] )
    S = ST.T
    A = ST* S 
    AI =  A**(-1)
    K = AI * ST
    sol =  K * ymat

    yth = S *  sol

    diff = ymat - yth
    s2l = [ x**2 for x in diff ]
    s2 = 0
    for x in s2l:
        s2 += x
    s2 /= ( len( ydata ) - len( funclist ) )
    cov = s2 * K *  K.T
    return sol, cov

"""
testing
"""
mp.prec = 50

xlth = [ mpf( -2 + (5 + 2) * i / 154 )for i in range( 155 ) ]
xl = [ mpf( -2 + (5 + 2) * i / 54 )for i in range( 55 ) ]
xmax = max( [ fabs( x ) for x in xl ] )
xls = [ x / xmax for x in xl ]
xlsth = [ x / xmax for x in xlth ]
yl = [
    2.1 * mp.sin( 3.83 * x ) for x in xl
]


fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )

ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )

for order in[ 2, 4, 12, 18 ]:
    fl  = make_func_list( order )
    opt, popt = my_linear_fit( xls, yl, fl )
    print( opt.T )
    fit = [ [ o * f( x ) for x in xlsth ] for o,f in zip( opt, fl ) ]
    fitsum = fit[0]
    for line in fit[ 1::]:
        fitsum = [ x + y for x, y in zip( fitsum, line )]
    bx.plot( xlsth, fitsum)
plt.show()

编辑2

事实上,numpy对此有一个内置机制。我不确定它是如何详细完成的,因为它最终会从库中调用函数,但我猜它使用SVD。QR分解仍然有助于获得协方差矩阵

from scipy.linalg import qr

def my_linear_fit( xdata, ydata, funclist ):
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    q, r = qr( S )
    sol = np.linalg.lstsq( S, ydata )[0]
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov =  np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
    return sol, cov

"""
testing
"""

xl = np.linspace( -2, 8, 55 )
xmax = max( np.abs( xl ) )
xls = xl / xmax
yl = np.fromiter(
    ( 2.1 * np.sin( 2.83 * x ) for x in xl),
    float
)


fig = plt.figure()
ax = fig.add_subplot( 2, 1, 1 )
bx = fig.add_subplot( 2, 1, 2 )

ax.plot( xl, yl, ls='', marker='o' )
bx.plot( xls, yl, ls='', marker='o' )

for order in[ 5, 10, 15, 20 ]:
    fl  = make_func_list( order )
    opt, popt = my_linear_fit( xls, yl, fl )
    fit = np.array( [ o * f( xls ) for o,f in zip( opt, fl ) ] )
    fit = np.sum( fit, axis=0 )
    fits = np.array( [ o/(xmax**n) * f( xl ) for n, o, f in zip( range(order + 1), opt, fl ) ] )
    fits = np.sum( fits, axis=0 )
    ax.plot( xl, fits)
    bx.plot( xls, fit)
plt.show()

编辑3

这将是仅使用QR分解的解决方案。在我的例子中,它可以轻松地工作到订单20(重新缩放)

from scipy.linalg import solve_triangular

def my_linear_fit( xdata, ydata, funclist ):
    n = len( funclist )
    ST = np.array( [ f( xdata ) for f in funclist ] )
    S = np.transpose( ST )
    q, r = qr( S )
    rred = r[ : n ] ### making it square...skip the zeros
    yred = np.dot( np.transpose( q ), ydata )[ : n ]
    sol = solve_triangular( rred, yred )
    yth = np.dot( S, sol )
    diff = ydata - yth
    s2 = np.sum( diff**2 ) / ( len( diff ) - len( funclist ) )
    cov =  np.linalg.inv( np.dot( np.transpose( r ), r ) ) * s2
    return sol, cov

相关问题 更多 >