我有一个XGBoostClassifier,它将损害分为简单案件和复杂案件,并希望在成本函数上对其进行优化,但在我划分为不同大小的类别之前。由于分类器对某些情况进行了错误分类,我希望hyperopt对这些大小类别进行优化。因此,如果损失成本更高,就更难归类为简单案件。由于某些原因,hyperopt在每个尺寸类别之后的表现都不好。你知道吗
a = 0.5
space1 = {
'100': a,
'200': a,
'300': a,
'400': a,
'500': a,
'600': a,
'700': a,
'800': a,
'900': a,
'901': a
}
space = {
'100': hp.quniform('100', 0,1,0.01),
'200': hp.quniform('200', 0,1,0.01),
'300': hp.quniform('300', 0,1,0.01),
'400': hp.quniform('400', 0,1,0.01),
'500': hp.quniform('500', 0,1,0.01),
'600': hp.quniform('600', 0,1,0.01),
'700': hp.quniform('700', 0,1,0.01),
'800': hp.quniform('800', 0,1,0.01),
'900': hp.quniform('900', 0,1,0.01),
'901': hp.quniform('901', 0,1,0.01)
}
rstate = np.random.RandomState(7)
CM = []
paramst = {'colsample_bytree': 0.9697677752045393, 'gamma': 2,
'learning_rate': 0.08597574025592336, 'max_depth': 14,
'n_estimators': 29, 'subsample': 0.900027377690923}
model = XGBClassifier(**paramst, random_state = 7)
model.fit(X_train, y_train)
probab = model.predict_proba(X_valid)
prob = np.array(probab[:,0])
y_pred_pre = model.predict(X_valid)
cost = X_valid['cost']
cost = np.array(cost)
ClassK = [100,200,300,400,500,600,700,800,900,901]
PercentK = 0.1
def treshold(params):
def classification(x, y):
class1 = [None]*len(y)
for idx, i in enumerate(y):
l = True
if (x[idx] < ClassK[0]):
if (params['100'] > i):
l = False
elif (x[idx] < ClassK[1]) and (x[idx] > ClassK[0]):
if (params['200'] > i):
l = False
elif (x[idx] < ClassK[2]) and (x[idx] > ClassK[1]):
if (params['300'] > i):
l = False
elif (x[idx] < ClassK[3]) and (x[idx] > ClassK[2]):
if (params['400'] > i):
l = False
elif (x[idx] < ClassK[4]) and (x[idx] > ClassK[3]):
if (params['500'] > i):
l = False
elif (x[idx] < ClassK[5]) and (x[idx] > ClassK[4]):
if (params['600'] > i):
l = False
elif (x[idx] < ClassK[6]) and (x[idx] > ClassK[5]):
if (params['700'] > i):
l = False
elif (x[idx] < ClassK[7]) and (x[idx] > ClassK[6]):
if (params['800'] > i):
l = False
elif (x[idx] < ClassK[8]) and (x[idx] > ClassK[7]):
if (params['900'] > i):
l = False
elif (x[idx] >= ClassK[9]):
if (params['901'] > i):
l = False
class1[idx] = l
return class1
cl = classification(x=cost, y= prob)
def cmsumfunction(cl):
CM = []
for i in range(len(y_valid)):
if not y_valid[i] and not cl[i]:
CM.append(0)
elif not y_valid[i] and cl[i]:
CM.append(1)
elif y_valid[i] and not cl[i]:
CM.append(2)
elif y_valid[i] and cl[i]:
CM.append(3)
X_valid.loc[:,'CM'] = CM
cmsum = X_valid.groupby('CM')['cost'].sum()
X_valid.drop('CM', axis = 1, errors = 'ignore', inplace = True)
return cmsum, CM
def costfunc(cl):
cmsum, CM = cmsumfunction(cl)
a = 0
try:
a = cmsum[1]*PercentK
except:
print('Error')
b = CM.count(3)*10+CM.count(1)*10
return a-b
return {'loss': costfunc(cl), 'status': 'ok', 'class': cl, 'Parameter': params}
def optimize (random_state = 7):
resultlist = []
trialslist = []
for i in space:
space_copy = space1.copy()
space_copy[i] = space[i]
global trials
trials = Trials()
best = fmin(treshold, space_copy, algo=tpe.suggest, max_evals = 100, trials = trials)
resultlist.append(best)
trialslist.append(trials)
return(resultlist, trialslist)
result_hyper = []
result, thresholds = optimize()
result_hyper.append(result)
输出:
100%|█████████████████████████████████████████████████| 100/100 [00:12<00:00, 7.24it/s, best loss: 117349.50499999977]
100%|██████████████████████████████████████████████████| 100/100 [00:14<00:00, 7.85it/s, best loss: 112980.0749999994]
100%|██████████████████████████████████████████████████| 100/100 [00:12<00:00, 8.36it/s, best loss: 98664.08699999997]
100%|██████████████████████████████████████████████████| 100/100 [00:13<00:00, 7.32it/s, best loss: 96265.73999999941]
100%|█████████████████████████████████████████████████| 100/100 [00:13<00:00, 7.36it/s, best loss: 101592.99399999916]
100%|██████████████████████████████████████████████████| 100/100 [00:13<00:00, 6.67it/s, best loss: 105445.9679999993]
100%|█████████████████████████████████████████████████| 100/100 [00:16<00:00, 6.03it/s, best loss: 105469.19599999941]
100%|█████████████████████████████████████████████████| 100/100 [00:15<00:00, 6.87it/s, best loss: 109068.51899999939]
100%|██████████████████████████████████████████████████| 100/100 [00:13<00:00, 7.63it/s, best loss: 110204.6389999995]
100%|██████████████████████████████████████████████████| 100/100 [00:12<00:00, 7.75it/s, best loss: 99880.33999999936]
结果\u超结果如下:
[[{'100': 0.02},
{'200': 0.98},
{'300': 1.0},
{'400': 0.97},
{'500': 0.98},
{'600': 0.97},
{'700': 0.96},
{'800': 0.99},
{'900': 0.98},
{'901': 0.99}]]
我希望能为最高的负成本(利润)找到最好的解决方案。 例如,如果我把a=1,把class=100改为0.8,我会得到负的结果。在我自己开始优化它之前,有没有什么解决方案可以解释为什么Hyperopt不能获得很好的结果? 谢谢你们的帮助。你知道吗
目前没有回答
相关问题 更多 >
编程相关推荐