我使用下面的代码在python中进行带正则化的logistic回归。它给了我80%的准确率。在
我正在使用最小化方法'TNC'。用BFG,结果为50%。 梯度下降的理想方法是什么(相当于倍频程的fminunc)? 如何增加或减少迭代? 什么是默认迭代? 是否有其他改进绩效的建议/方法?在
与fminunc相同的倍频程算法在训练集上给出83%的准确率。在
import numpy as np
import scipy.optimize as op
from sklearn import preprocessing
import matplotlib.pyplot as plt
from matplotlib import style
from pylab import scatter, show, legend, xlabel, ylabel
from numpy import loadtxt, where
from sklearn.preprocessing import PolynomialFeatures
def sigmoid(z):
return 1/(1 + np.exp(-z));
def Gradient(theta,X,y,l):
m,n = X.shape
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
y = y.reshape((m,1))
h = sigmoid(X.dot(theta))
nonRegGrad = ((np.sum(((h-y)*X),axis=0))/m).reshape(n,1)
reg = np.insert((l/m)*thetaR,0,0,axis=0)
grad = nonRegGrad + reg
return grad.flatten();
def CostFunc(theta,X,y,l):
h = sigmoid(X.dot(theta))
m,n=X.shape;
#print("theta shape")
#print(theta.shape)
theta = theta.reshape((n,1))
thetaR = theta[1:n,:]
cost=np.sum((np.multiply(-y,np.log(h))-np.multiply((1-y),np.log(1-h))))/m
reg=(l/(2*m))* np.sum(np.square(thetaR))
J=cost+reg
return J;
def predict(theta,X):
m,n=X.shape;
return np.round(sigmoid(X.dot(theta.reshape(n,1))));
data = np.loadtxt(open("ex2data2.txt","rb"),delimiter=",",skiprows=1)
nr,nc = data.shape
X=data[:,0:nc - 1]
#X=preprocessing.scale(X)
#X=np.insert(X,0,1,axis=1)
y= data[:,[nc - 1]]
pos = where(y == 1)
neg = where(y == 0)
scatter(X[pos, 0], X[pos, 1], marker='o', c='b')
scatter(X[neg, 0], X[neg, 1], marker='x', c='r')
xlabel('Microchip Test 1')
ylabel('Microchip Test 2')
legend(['Passed', 'Failed'])
show()
storeX=X
poly = PolynomialFeatures(6)
X=poly.fit_transform(X)
#print(X.shape)
m , n = X.shape;
initial_theta = np.zeros((n,1));
#initial_theta = zeros(shape=(it.shape[1], 1))
l = 1
# Compute and display initial cost and gradient for regularized logistic
# regression
#cost, grad = cost_function_reg(initial_theta, X, y, l)
#def decorated_cost(theta):
# return cost_function_reg(theta, X, y, l)
#print fmin_bfgs(decorated_cost, initial_theta, maxfun=400)
print("Calling optimization")
Result = op.minimize(fun = CostFunc,
x0 = initial_theta,
args = (X, y,l),
method = 'TNC',
jac = Gradient);
optimal_theta = Result.x;
print(Result.x.shape)
print("optimal theta")
print(optimal_theta)
p=predict(optimal_theta,X)
accuracy = np.mean(np.double(p==y))
print("accuracy")
print(accuracy)
enter code here
目前没有回答
相关问题 更多 >
编程相关推荐