Kmeans聚类算法不覆盖

2024-09-27 20:16:12 发布

您现在位置:Python中文网/ 问答频道 /正文

我想对0-9的图像执行k-means聚类算法(我想要10个聚类)。我正在做以下工作:1.随机选择一个图像,使其成为集群1 k1的中心,并从图像数组中删除该图像。2.接下来,我根据欧几里德距离从该图像中选择最远的图像,并将其删除3.我不断重复步骤2,直到得到10个唯一的聚类k1-k10。然后,我计算每个图像所属的集群并将其存储在一个数组中,我不断重复,直到该数组停止更改

我面临的问题是,阵列从未停止变化

函数返回一个数组,该数组确定每个图像的簇(数组的索引表示图像编号)。它们是2400幅图像。)

def calculateMembership(k_clusters,images_matrix_clustering):
    membership_matrix1=np.zeros(2400)
    for i in range(0,len(images_matrix_clustering)):
        number=i+1
        img = plt.imread("Images/"+str(number)+".jpg").ravel()
        dist1 =np.linalg.norm(k_clusters[0]-img)
        dist2 =np.linalg.norm(k_clusters[1]-img)
        dist3 =np.linalg.norm(k_clusters[2]-img)
        dist4 =np.linalg.norm(k_clusters[3]-img)
        dist5 =np.linalg.norm(k_clusters[4]-img)
        dist6 =np.linalg.norm(k_clusters[5]-img)
        dist7 =np.linalg.norm(k_clusters[6]-img)
        dist8 =np.linalg.norm(k_clusters[7]-img)
        dist9 =np.linalg.norm(k_clusters[8]-img)
        dist10 =np.linalg.norm(k_clusters[9]-img)
        minimum_dist=[dist1,dist2,dist3,dist4,dist5,dist6,dist7,dist8,dist9,dist10]
        membership_matrix1[i]=np.argmin(minimum_dist)+1 #get index of min euclidean distance (cluster it belongs to)
    return membership_matrix1    

以下是我确定初始10个集群的代码:

images_matrix = np.zeros((2400,784)) #array which will hold the images. We choose randomly from it, and remove chosen.
images_matrix_clustering = np.zeros((2400,784))#Array of all the images, which nothing will be removed from
k_clusters = np.zeros((10,784))   #an array which will hold the k (10) updated centroid
for i in range (0,2400): #convert the images to binary images
    number=i+1
    img = plt.imread("Images/"+str(number)+".jpg").ravel() #read image
    a= [int(pixel > 140) for pixel in img] #convert anything less than 140 to 0 and anything greater than 140 to 1
    a= np.asarray(a)
    images_matrix[i]=a  #replace old image with new binary one
    images_matrix_clustering[i]=a #replace old image with the new binary one
randImg = np.random.choice(range(2400), 1, replace=False) #choose 1 random image
k1= images_matrix[randImg[0]]#store the first image as the first mean k1
images_matrix=np.delete(images_matrix, randImg[0],0)#delete the chosen cluster from the images array
maxdist= -1 #hold the index of the most further image in the images_matrix
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k1
            euclidean_distance= np.linalg.norm(k1-images_matrix[i])#calculate euclidean
            if(euclidean_distance>=initDist):#compare euclidean with current most max
                maxdist=i #get the index of the most far away image till now
                initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k2= images_matrix[maxdist]
#remove  from images_matrix k2
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k2
        euclidean_distance= np.linalg.norm(k2-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with 0
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k3= images_matrix[maxdist] #get the furtherst image and make it the third mean k3
#remove k3 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k3
        euclidean_distance= np.linalg.norm(k3-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k4= images_matrix[maxdist]
#remove k4 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)): #get the image which is most further away from k4
        euclidean_distance= np.linalg.norm(k4-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist): #compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k5= images_matrix[maxdist]
#remove k5 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k5
        euclidean_distance= np.linalg.norm(k5-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k6= images_matrix[maxdist]
#remove k6 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k6
        euclidean_distance= np.linalg.norm(k6-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k7 =images_matrix[maxdist]
#remove k7 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k7
        euclidean_distance= np.linalg.norm(k7-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k8 =images_matrix[maxdist]
#remove k8 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k8
        euclidean_distance= np.linalg.norm(k8-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k9=images_matrix[maxdist]
#remove k9 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k9
        euclidean_distance= np.linalg.norm(k9-images_matrix[i])#calculate euclidean
        if(euclidean_distance>=initDist):#compare euclidean with current most max
            maxdist=i #get the index of the most far away image till now
            initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k10=images_matrix[maxdist]
#remove k10 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
k_clusters[0]=k1
k_clusters[1]=k2
k_clusters[2]=k3
k_clusters[3]=k4
k_clusters[4]=k5
k_clusters[6]=k7
k_clusters[7]=k8
k_clusters[8]=k9
k_clusters[9]=k10

最后,这是我检查算法是否收敛的地方。但是,代码的这一部分永远不会终止:

membership=calculateMembership(k_clusters,images_matrix_clustering) #find intial membership matrix
previousMembership= np.negative(np.ones(2400))
while((previousMembership!=membership).all()):
    for num in range (0,9):#for the 10 clusters
        currentIndex = num+1#cluster number
        indices = [i for i, x in enumerate(membership) if x == currentIndex] #get all images indices which belong to cluster: currentIndex i+1
         #compute mean of that cluster images
        temp_matrix_images=np.zeros((len(indices),784))#create an array which will hold images classified as the same cluster
        for i in range(0,len(indices)):
            number=indices[i]+1
            img = plt.imread("Images/"+str(number)+".jpg").ravel()
            temp_matrix_images[i]=img#store images of the same cluster in an array called temp_matrix
        theMean= np.mean(temp_matrix_images,axis=0)#get mean of those images in a form of one new image
        k_clusters[num]=theMean#place the mean of those images in the k_clusters array which holds the values of the 10 clusters
    previousMembership =membership
    membership =calculateMembership(k_clusters,images_matrix_clustering)

Tags: oftheimagemostgetnpmatrixdistance

热门问题