为什么我定制的线性回归模型与sklearn不匹配？

import numpy as np import pandas np.random.seed(1) alpha = 0.1 def h(x, w): return np.dot(w.T, x) def cost(X, W, Y): totalCost = 0 for i in range(47): diff = h(X[i], W) - Y[i] squared = diff * diff totalCost += squared return totalCost / 2 housing_data = np.loadtxt('Housing.csv', delimiter=',') x1 = housing_data[:,0] x2 = housing_data[:,1] y = housing_data[:,2] avgX1 = np.mean(x1) stdX1 = np.std(x1) normX1 = (x1 - avgX1) / stdX1 print('avgX1', avgX1) print('stdX1', stdX1) avgX2 = np.mean(x2) stdX2 = np.std(x2) normX2 = (x2 - avgX2) / stdX2 print('avgX2', avgX2) print('stdX2', stdX2) normalizedX = np.ones((47, 3)) normalizedX[:,1] = normX1 normalizedX[:,2] = normX2 np.savetxt('normalizedX.csv', normalizedX) weights = np.ones((3,)) for boom in range(100): currentCost = cost(normalizedX, weights, y) if boom % 1 == 0: print(boom, 'iteration', weights[0], weights[1], weights[2]) print('Cost', currentCost) for i in range(47): errorDiff = h(normalizedX[i], weights) - y[i] weights[0] = weights[0] - alpha * (errorDiff) * normalizedX[i][0] weights[1] = weights[1] - alpha * (errorDiff) * normalizedX[i][1] weights[2] = weights[2] - alpha * (errorDiff) * normalizedX[i][2] print(weights) predictedX = [1, (2100 - avgX1) / stdX1, (3 - avgX2) / stdX2] firstPrediction = np.array(predictedX) print('firstPrediction', firstPrediction) firstPrediction = h(firstPrediction, weights) print(firstPrediction)

1条回答

网友

1楼 · 发布于 2024-06-28 19:52:33

我想你在梯度下降中遗漏了1/m项（其中m是y的大小）。在包含了1/m项之后，我似乎得到了一个与您的sklearn代码类似的预测值。在

见下文

....
weights = np.ones((3,))

m = y.size
for boom in range(100):
  currentCost = cost(normalizedX, weights, y)
  if boom % 1 == 0:
    print(boom, 'iteration', weights[0], weights[1], weights[2])
    print('Cost', currentCost)

  for i in range(47):
    errorDiff = h(normalizedX[i], weights) - y[i]
    weights[0] = weights[0] - alpha *(1/m)* (errorDiff) * normalizedX[i][0]
    weights[1] = weights[1] - alpha *(1/m)*  (errorDiff) * normalizedX[i][1]
    weights[2] = weights[2] - alpha *(1/m)* (errorDiff) * normalizedX[i][2]

...

第一个预测是355242。在

这与线性回归模型很好地吻合，即使它不做梯度下降。在

我也尝试了sklearn中的sgdregressor（使用随机梯度下降），它似乎也得到了一个接近线性回归模型和你的模型的值。请参阅下面的代码

^{pr2}$

结果

predict 355533.10119985335

相关问题更多 >

编程相关推荐

热门问题

热门文章