# The array to check:
array = numpy.array([[1, 2, 3],
[2, 3, 4],
[1, 2, 3],
[3, 2, 1],
[3, 4, 5]])
# List that contains the indices of duplicates (which should be deleted)
deleteIndices = []
for i in range(0,len(array)): # Loop through entire array
indices = range(0,len(array)) # All indices in array
del indices[i] # All indices in array, except the i'th element currently being checked
for j in indexes: # Loop through every other element in array, except the i'th element, currently being checked
if(array[i] == array[j]).all(): # Check if element being checked is equal to the j'th element
deleteIndices.append(j) # If i'th and j'th element are equal, j is appended to deleteIndices[]
# Sort deleteIndices in ascending order:
deleteIndices.sort()
# Delete duplicates
array = numpy.delete(array,deleteIndices,axis=0)
a = np.array([[1, 2, 3],
[2, 3, 4],
[1, 2, 3],
[3, 2, 1],
[3, 4, 5]])
#use a flexible data type, np.void, to combine the columns of `a`
#size of np.void is the number of bytes for an element in `a` multiplied by number of columns
b = a.view(np.dtype((np.void, a.dtype.itemsize * a.shape[1])))
_, index, inv = np.unique(b, return_index = True, return_inverse = True)
def return_counts(index, inv):
count = np.zeros(len(index), np.int)
np.add.at(count, inv, 1)
return count
counts = return_counts(index, inv)
#if you want the indices to discard replace with: counts[i] > 1
index_keep = [i for i, j in enumerate(index) if counts[i] == 1]
>>>a[index_keep]
array([[2, 3, 4],
[3, 2, 1],
[3, 4, 5]])
#if you don't need the indices and just want the array returned while preserving the order
a_unique = np.vstack(a[idx] for i, idx in enumerate(index) if counts[i] == 1])
>>>a_unique
array([[2, 3, 4],
[3, 2, 1],
[3, 4, 5]])
如果要删除重复版本中存在的元素的所有实例,可以遍历数组,找到多个版本中存在的元素的索引,最后删除这些:
该输出:
^{pr2}$这样一来,您既可以删除重复项,又可以获得一个要丢弃的索引列表。在
我们希望找到数组中没有重复的行,同时保持顺序。在
我使用这个solution将
a
的每一行组合成一个元素,这样我们就可以使用np.unique(,return_index=True, return_inverse= True)
来找到唯一的行。然后,我修改了这个function,使用索引和反转输出唯一行的计数。从那里,我可以选择具有counts == 1
的所有唯一行。在为了np.版本>;=1.9
^{pr2}$numpy_indexed包(免责声明:我是其作者)可用于以矢量化方式解决此类问题:
相关问题 更多 >
编程相关推荐