import pandas as pd
films = { 'title': titles, 'rank': ranks, 'synopsis': synopses, 'cluster': clusters, 'genre': genres }
#frame = pd.DataFrame(films[frame] for frame in films.keys())
frame = pd.DataFrame(films,columns = ['rank', 'title', 'cluster', 'genre'])
dups_shape = frame.pivot_table(index=['cluster'], aggfunc='size')
print (dups_shape)
items = { 'title': titles, 'Synopses': synopses}
frame = pd.DataFrame(items, index = [clusters] , columns = [ 'title','cluster'])
print("Top terms per cluster:")
# sort cluster centers by proximity to centroid
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
for i in range(num_clusters):
print("Cluster %d words:" % i, end='')
for ind in order_centroids[i, :6]: # replace 6 with n words per cluster
print(' %s' % vocab_frame.loc[terms[ind].split(' ')].values.tolist()[0][0], end=',')
print()
我不知道如何解决这个问题。因为我复制了这个代码并根据我的项目进行了调整。你知道吗
编辑:评论中OP提供的完整回溯
Top terms per cluster:
Cluster 0 words:
---------------------------------------------------------------------------
IndexError Traceback (most recent call last) <ipython-input-40-2088a7881446> in <module>
13
14 for ind in order_centroids[i, :6]: # replace 6 with n words per cluster
--->15 print(' %s' % vocab_frame.loc[terms[ind].split(' ')].values.tolist()[0][0], end=',')
16
17 IndexError: list index out of range
目前没有回答
相关问题 更多 >
编程相关推荐