优化python代码过滤numpy数组

def searchsorted_filter(a, b, thresh): choices = np.sort(b) # if b is already sorted, skip it lidx = np.searchsorted(choices, a, 'left').clip(max=choices.size-1) ridx = (np.searchsorted(choices, a, 'right')-1).clip(min=0) cl = np.take(choices,lidx) # Or choices[lidx] cr = np.take(choices,ridx) # Or choices[ridx] return a[np.minimum(np.abs(a - cl), np.abs(a - cr)) < thresh] from shapely.geometry import LineString, Point, LinearRing import time import numpy as np start_time = time.time() HLat22 = np.asarray([100,200,300,32.47156,500,600,700,800,900,1000]) HLong22 = np.asarray([-100,-200,-300,-86.79192,-500,-600,-700,-800,-900,-1000]) polygon2 = LineString ([Point(-86.79191,32.47155), Point(-86.78679699999999,32.47005)]) #Getting lat and long coordinates numpy_x = np.array(polygon2.coords.xy[0]) numpy_y = np.array(polygon2.coords.xy[1]) #Filtering so I only remain with coordinates The_X = searchsorted_filter(HLong22,numpy_x,thresh=0.005) The_Y = searchsorted_filter(HLat22,numpy_y,thresh=0.005) print("Secsfilter: %s",time.time()-start_time) start_time = time.time() indices = np.in1d(HLong22, The_X) & np.in1d(HLat22, The_Y) print("Secsin1d: %s",time.time()-start_time)

1条回答

网友

1楼 · 发布于 2024-10-04 11:33:50

通常算法优于低级优化（例如二进制搜索与线性搜索；前者更适合大n；后者更适合小n）。你知道吗

在这方面没有太多经验，完全忽略了你给出的数字，这里有一些演示你应该试试！您必须为您的任务定制自己的基准（并调整可用的参数）！

这个想法是：

使用metric-trees这是一些度量空间中类似最近邻搜索的专用数据结构
这里：ball-tree（属于sklearn）支持haversine metric
支持以下查询：
找k个最近的邻居
获取距离内的所有邻居<=x

代码：

from sklearn.neighbors import BallTree
import numpy as np

Coords = np.array([[51.165691, 10.451526],  # GER
                   [40.463667, -3.74922],   # ESP
                   [61.52401, 105.318756]]) # RUS
print(Coords)

polygon2 = np.array([[52.520008, 13.404954],   # BERLIN
                     [55.751244, 37.618423]])  # MOSCOW
print(polygon2)

# BUILD TREE for LOOKUP
tree = BallTree(Coords, metric='haversine')

# QUERY NEAREST NEIGHBORS
print('\nnearest neighbor search')
dist, ind = tree.query(polygon2, k=1)
print('dist: ', dist)
print('indices: ', ind)     

# QUERY FOR DISTANCE <= X
print('\nradius search')
ind = tree.query_radius(polygon2[0][np.newaxis], 0.15)
print('indices: ', ind)

输出

[[  51.165691   10.451526]
 [  40.463667   -3.74922 ]
 [  61.52401   105.318756]]
[[ 52.520008  13.404954]
 [ 55.751244  37.618423]]

nearest neighbor search
dist:  [[ 0.11852066]
 [ 0.76816021]]
indices:  [[0]
 [2]]

radius search
indices:  [array([0], dtype=int64)]

代码：

输出

相关问题更多 >

编程相关推荐

热门问题

热门文章