为什么这个线性分类器算法是错误的？

from matplotlib import pyplot as plt import numpy as np import random n = 4 x_values = [round(random.uniform(-1,1),3) for _ in range(n)] y_values = [round(random.uniform(-1,1),3) for _ in range(n)] pts10 = zip(x_values, y_values) label_dict = {} x1, y1, x2, y2 = (round(random.uniform(-1,1),3) for _ in range(4)) b = [x1, y1] d = [x2, y2] slope, intercept = np.polyfit(b, d, 1) fig, ax = plt.subplots(figsize=(8,8)) ax.scatter(*zip(*pts10), color = 'black') ax.plot(b,d,'b-') label_plus = '+' label_minus = '--' i = 1 for x,y in pts10: if(y > (slope*x + intercept)): ax.annotate(label_plus, xy=(x,y), xytext=(0, -10), textcoords='offset points', color = 'blue', ha='center', va='center') label_dict['point{}'.format(i)] = [(x,y), "+1"] else: ax.annotate(label_minus, xy=(x,y), xytext=(0, -10), textcoords='offset points', color = 'red', ha='center', va='center') label_dict['point{}'.format(i)] = [(x,y), "-1"] i += 1 # this is the algorithm def check(ww,rr): while(np.dot(ww,rr) >= 0): print "being refined 1" ww = np.subtract(ww,rr) return ww def check_two(ww,rr): while(np.dot(ww,rr) < 0): print "being refined 2" ww = np.add(ww,rr) return ww w = np.array([0,0]) ii = 1 for x,y in pts10: r = np.array([x,y]) print w if (np.dot(w,r) >= 0) != int(label_dict['point{}'.format(ii)][1]) < 0: print "Point " + str(ii) + " should have been below the line" w = np.subtract(w,r) w = check(w,r) elif (np.dot(w,r) < 0) != int(label_dict['point{}'.format(ii)][1]) >= 0: print "Point " + str(ii) + " should have been above the line" w = np.add(w,r) w = check_two(w,r) else: print "Point " + str(ii) + " is in the correct position" ii += 1 ax.plot(w,'g--') ax.set_xlabel('X-axis') ax.set_ylabel('Y-axis') ax.set_title('Labelling 10 points') ax.set_xticks(np.arange(-1, 1.1, 0.2)) ax.set_yticks(np.arange(-1, 1.1, 0.2)) ax.set_xlim(-1, 1) ax.set_ylim(-1, 1) ax.legend()

2条回答

网友

1楼 · 编辑于 2024-10-01 11:34:39

这就是我想到的答案。我意识到的一些注意事项：
w=w+y（r）算法只适用于归一化向量'w'是权向量，'r'是问题点的[x，y]，'y'是标签的符号。
通过将系数放在ax+by+c=0的形式中并求解“y”，可以从得到的向量“w”中找到斜率和截距。你知道吗

w = np.array([0,0,0])
restart = True
while restart:  
    ii = 0
    restart = False
    for x,y in pts10:
        if(restart == False):
            ii += 1

    r = np.array([x,y,1])    
    if (np.dot(w,r) >= 0) and int(label_dict['point{}'.format(ii)][1]) >= 0:
        print "Point " + str(ii) + " is correctly above the line  > no adjustments"      
    elif (np.dot(w,r) < 0) and int(label_dict['point{}'.format(ii)][1]) < 0:
        print "Point " + str(ii) + " is correctly below the line  > no adjustments"        
    elif (np.dot(w,r) >= 0) and int(label_dict['point{}'.format(ii)][1]) < 0:
        print "Point " + str(ii) + " should have been below the line"  
        w = np.subtract(w,r)
        restart = True
        break       
    elif (np.dot(w,r) < 0) and int(label_dict['point{}'.format(ii)][1]) >= 0:
        print "Point " + str(ii) + " should have been above the line"           
        w = np.add(w,r)
        restart = True
        break           
    else:
        print "THERE IS AN ERROR, A POINT PASSED THROUGH HERE"

print w
slope_w = (-w[0])/w[1] 
intercept_w = (-w[2])/w[1]

网友

2楼 · 编辑于 2024-10-01 11:34:39

例如，您可以使用scikit learn（sklearn）中的^{}。线性分类器计算预测如下（参见the source code）：

def predict(self, X):
        scores = self.decision_function(X)
        if len(scores.shape) == 1:
            indices = (scores > 0).astype(np.int)
        else:
            indices = scores.argmax(axis=1)
    return self.classes_[indices]

其中^{}由下式给出：

def decision_function(self, X):
        [...]

        scores = safe_sparse_dot(X, self.coef_.T,
                                 dense_output=True) + self.intercept_
    return scores.ravel() if scores.shape[1] == 1 else scores

因此对于您的示例的二维情况，这意味着数据点被分类+1，如果

x*w1 + y*w2 + i > 0

在哪里

x, y = X
w1, w2 = self.coef_
i = self.intercept_

否则-1。因此决定取决于x*w1 + y*w2 + i大于或小于（或等于）零。因此，通过设置x*w1 + y*w2 + i == 0可以找到“border”。我们可以自由选择其中一个组成部分，另一个由这个方程决定。你知道吗

下面的代码片段匹配SGDClassifier，并绘制结果“border”。它假设数据点分散在原点周围（x, y = 0, 0），即它们的平均值（大约）为零。实际上，为了得到好的结果，我们应该先从数据点中减去平均值，然后进行拟合，然后再将平均值加到结果中。下面的代码片段只是散布原点周围的点。你知道吗

import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import SGDClassifier

n = 100
x = np.random.uniform(-1, 1, size=(n, 2))

# We assume points are scatter around zero.
b = np.zeros(2)
d = np.random.uniform(-1, 1, size=2)
slope, intercept = (d[1] / d[0]), 0.

fig, ax = plt.subplots(figsize=(8,8))
ax.scatter(x[:, 0], x[:, 1], color = 'black')
ax.plot([b[0], d[0]], [b[1], d[1]], 'b-', label='Ideal')

labels = []
for point in x:
    if(point[1] > (slope * point[0] + intercept)):
        ax.annotate('+', xy=point, xytext=(0, -10), textcoords='offset points', color = 'blue', ha='center', va='center')
        labels.append(1)
    else:
        ax.annotate(' ', xy=point, xytext=(0, -10), textcoords='offset points', color = 'red', ha='center', va='center')
        labels.append(-1)

labels = np.array(labels)
classifier = SGDClassifier()
classifier.fit(x, labels)

x1 = np.random.uniform(-1, 1)
x2 = (-classifier.intercept_ - x1 * classifier.coef_[0, 0]) / classifier.coef_[0, 1]

ax.plot([0, x1], [0, x2], 'g ', label='Fit')

plt.legend()
plt.show()

此图显示n = 100数据点的结果：

下图显示了从包含1000个数据点的池中随机选择点的不同n的结果：

相关问题更多 >

编程相关推荐

热门问题

热门文章