免赔额建模困难使用scipy.optimize.minimize实现收敛

from scipy.stats import gamma from scipy import special import pandas as pd import numpy as np import scipy as s def freq_mean(intercept_freq, beta_veh_age_log, beta_dr_section_b): return np.exp(intercept_freq + beta_veh_age_log * freq_data['veh_age_log'] + beta_dr_section_b * freq_data['dr_section_b']) def sev_mean(intercept_sev, alpha_veh_age_log, alpha_acv_log, df): return np.exp(intercept_sev + alpha_veh_age_log * df['veh_age_log'] + alpha_acv_log * df['acv_log']) def freq_dist(x, mu, phi): return (sp.special.gamma(phi + x) / sp.special.gamma(phi) / sp.special.factorial(x)) * ((phi / (phi + mu)) ** phi) * ((mu / (phi + mu)) ** x) def sev_dist(x, mu, phi, ded): gamma_pdf = (((phi / mu) ** phi) / sp.special.gamma(phi)) * (x ** (phi - 1.0)) * np.exp(-phi * x / mu) gamma_sdf = 1.0 - sp.stats.gamma.cdf(x = phi * ded / mu, a = phi, scale = 1.0) if gamma_sdf == 0.0: return 0.0 else: return gamma_pdf / gamma_sdf def sev_loglik(intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev): sev_data['mu_sev'] = sev_mean(intercept_sev, alpha_veh_age_log, alpha_acv_log, sev_data) return sev_data.apply(lambda x : np.log(sev_dist(x['claim_amount'], x['mu_sev'], phi_sev, x['deductible'])), axis = 1).sum() def freq_loglik(intercept_freq, beta_veh_age_log, beta_dr_section_b, phi_freq, intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev): freq_data['mu_sev'] = sev_mean(intercept_sev, alpha_veh_age_log, alpha_acv_log, freq_data) freq_data['claim_prob'] = 1.0 - sp.stats.gamma.cdf(x = phi_sev * freq_data['deductible'] / freq_data['mu_sev'], a = phi_sev, scale = 1.0) freq_data['mu_freq'] = freq_mean(intercept_sev, beta_veh_age_log, beta_dr_section_b) return freq_data.apply(lambda x : np.log(freq_dist(x['claim_count'], x['exposure'] * x['claim_prob'] * x['mu_freq'], x['exposure'] * phi_freq)), axis = 1).sum() def obj_func(param): intercept_freq, beta_veh_age_log, beta_dr_section_b, phi_freq, intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev = param ll_sev = sev_loglik(intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev) ll_freq = freq_loglik(intercept_freq, beta_veh_age_log, beta_dr_section_b, phi_freq, intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev) return -(ll_freq + ll_sev) def mle(nfev = 100): intercept_freq, beta_veh_age_log, beta_dr_section_b, phi_freq, intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev = (-1.87515443, -0.5675389200, -0.0216802900, 23.78568667, 10.42040743, 0.00465891, 0.00072216, 0.69497075) seeds = np.array([intercept_freq, beta_veh_age_log, beta_dr_section_b, phi_freq, intercept_sev, alpha_veh_age_log, alpha_acv_log, phi_sev]) return sp.optimize.minimize(fun = obj_func, x0 = seeds, method = 'Nelder-Mead', options = {'maxfev' : nfev}) #cons = ({'type': 'ineq', 'fun' : lambda x : x[3]}, {'type': 'ineq', 'fun' : lambda x : x[7]}) #return sp.optimize.minimize(fun = obj_func, x0 = seeds, method = 'SLSQP', constraints = cons) policies = pd.read_csv('C:/policies.txt', sep = '\t') claims = pd.read_csv('C:/claims.txt', sep = '\t') freq_data = pd.merge(left = policies, right = claims.groupby('ID').agg(claim_count = ('claim_amount', 'count')), how = 'left', on = 'ID') freq_data['claim_count'].fillna(0, inplace = True) sev_data = pd.merge(left = claims[['ID', 'claim_amount']], right = policies.drop(['dr_section_b', 'exposure'], axis = 1), how = 'left', on = 'ID') opt = mle(4000)

1条回答

网友

1楼 · 发布于 2024-09-30 04:26:23

Scipy包含许多优化算法，如果没有更好的建议，为什么不试试呢？你不能用解析的方法计算梯度这一事实是无关紧要的，scipy会用数值的方法来计算

具体来说，您可能希望尝试使用BFGS，如果这也失败了，那么您的代码中可能存在错误（我们很难发现，因为它目前无法运行），或者您的目标函数更难实现，需要一个更强大（尽管速度较慢）的全局优化器。Scipy附带了很多（差异进化、盆地跳跃、SHGO、双重退火），web上充满了Python的其他可选全局优化例程

相关问题更多 >

编程相关推荐

热门问题

热门文章