我想对0-9的图像执行k-means聚类算法(我想要10个聚类)。我正在做以下工作:1.随机选择一个图像,使其成为集群1 k1的中心,并从图像数组中删除该图像。2.接下来,我根据欧几里德距离从该图像中选择最远的图像,并将其删除3.我不断重复步骤2,直到得到10个唯一的聚类k1-k10。然后,我计算每个图像所属的集群并将其存储在一个数组中,我不断重复,直到该数组停止更改
我面临的问题是,阵列从未停止变化
函数返回一个数组,该数组确定每个图像的簇(数组的索引表示图像编号)。它们是2400幅图像。)
def calculateMembership(k_clusters,images_matrix_clustering):
membership_matrix1=np.zeros(2400)
for i in range(0,len(images_matrix_clustering)):
number=i+1
img = plt.imread("Images/"+str(number)+".jpg").ravel()
dist1 =np.linalg.norm(k_clusters[0]-img)
dist2 =np.linalg.norm(k_clusters[1]-img)
dist3 =np.linalg.norm(k_clusters[2]-img)
dist4 =np.linalg.norm(k_clusters[3]-img)
dist5 =np.linalg.norm(k_clusters[4]-img)
dist6 =np.linalg.norm(k_clusters[5]-img)
dist7 =np.linalg.norm(k_clusters[6]-img)
dist8 =np.linalg.norm(k_clusters[7]-img)
dist9 =np.linalg.norm(k_clusters[8]-img)
dist10 =np.linalg.norm(k_clusters[9]-img)
minimum_dist=[dist1,dist2,dist3,dist4,dist5,dist6,dist7,dist8,dist9,dist10]
membership_matrix1[i]=np.argmin(minimum_dist)+1 #get index of min euclidean distance (cluster it belongs to)
return membership_matrix1
以下是我确定初始10个集群的代码:
images_matrix = np.zeros((2400,784)) #array which will hold the images. We choose randomly from it, and remove chosen.
images_matrix_clustering = np.zeros((2400,784))#Array of all the images, which nothing will be removed from
k_clusters = np.zeros((10,784)) #an array which will hold the k (10) updated centroid
for i in range (0,2400): #convert the images to binary images
number=i+1
img = plt.imread("Images/"+str(number)+".jpg").ravel() #read image
a= [int(pixel > 140) for pixel in img] #convert anything less than 140 to 0 and anything greater than 140 to 1
a= np.asarray(a)
images_matrix[i]=a #replace old image with new binary one
images_matrix_clustering[i]=a #replace old image with the new binary one
randImg = np.random.choice(range(2400), 1, replace=False) #choose 1 random image
k1= images_matrix[randImg[0]]#store the first image as the first mean k1
images_matrix=np.delete(images_matrix, randImg[0],0)#delete the chosen cluster from the images array
maxdist= -1 #hold the index of the most further image in the images_matrix
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k1
euclidean_distance= np.linalg.norm(k1-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k2= images_matrix[maxdist]
#remove from images_matrix k2
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k2
euclidean_distance= np.linalg.norm(k2-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with 0
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k3= images_matrix[maxdist] #get the furtherst image and make it the third mean k3
#remove k3 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k3
euclidean_distance= np.linalg.norm(k3-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k4= images_matrix[maxdist]
#remove k4 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)): #get the image which is most further away from k4
euclidean_distance= np.linalg.norm(k4-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist): #compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k5= images_matrix[maxdist]
#remove k5 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k5
euclidean_distance= np.linalg.norm(k5-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k6= images_matrix[maxdist]
#remove k6 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k6
euclidean_distance= np.linalg.norm(k6-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k7 =images_matrix[maxdist]
#remove k7 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k7
euclidean_distance= np.linalg.norm(k7-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k8 =images_matrix[maxdist]
#remove k8 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k8
euclidean_distance= np.linalg.norm(k8-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k9=images_matrix[maxdist]
#remove k9 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
maxdist= -1 #hold the index of the most further image
initDist=-1 #holds the distance between the furtherest image and the current one
for i in range(0,len(images_matrix)):#get the image which is most further away from k9
euclidean_distance= np.linalg.norm(k9-images_matrix[i])#calculate euclidean
if(euclidean_distance>=initDist):#compare euclidean with current most max
maxdist=i #get the index of the most far away image till now
initDist=euclidean_distance #replace the most further away distance by the euclidean distance
k10=images_matrix[maxdist]
#remove k10 from images_matrix
images_matrix=np.delete(images_matrix, maxdist,0)
k_clusters[0]=k1
k_clusters[1]=k2
k_clusters[2]=k3
k_clusters[3]=k4
k_clusters[4]=k5
k_clusters[6]=k7
k_clusters[7]=k8
k_clusters[8]=k9
k_clusters[9]=k10
最后,这是我检查算法是否收敛的地方。但是,代码的这一部分永远不会终止:
membership=calculateMembership(k_clusters,images_matrix_clustering) #find intial membership matrix
previousMembership= np.negative(np.ones(2400))
while((previousMembership!=membership).all()):
for num in range (0,9):#for the 10 clusters
currentIndex = num+1#cluster number
indices = [i for i, x in enumerate(membership) if x == currentIndex] #get all images indices which belong to cluster: currentIndex i+1
#compute mean of that cluster images
temp_matrix_images=np.zeros((len(indices),784))#create an array which will hold images classified as the same cluster
for i in range(0,len(indices)):
number=indices[i]+1
img = plt.imread("Images/"+str(number)+".jpg").ravel()
temp_matrix_images[i]=img#store images of the same cluster in an array called temp_matrix
theMean= np.mean(temp_matrix_images,axis=0)#get mean of those images in a form of one new image
k_clusters[num]=theMean#place the mean of those images in the k_clusters array which holds the values of the 10 clusters
previousMembership =membership
membership =calculateMembership(k_clusters,images_matrix_clustering)
目前没有回答
相关问题 更多 >
编程相关推荐