高效地将点与几何体（多边形中的点）匹配到点和多边形的大型集合

import numpy as np from shapely.geometry import shape, MultiPolygon, Point, Polygon import geopandas as gpd import pandas as pd import matplotlib.pyplot as plt from shapely.strtree import STRtree #setup: np.random.seed(12345) # shape gridsize: gridsize=10 avgpointspergridspace=10 #point density

# creating a geodataframe (shapefile imported via geopandas): garr=np.empty((gridsize,gridsize),dtype=object) for i in range(gridsize): for j in range(gridsize): garr[i,j]=Point(i,j) # polygons: poly_list=[] for i in range(gridsize-1): for j in range(gridsize-1): temp_points=[garr[i,j],garr[i,j+1],garr[i+1,j+1],garr[i+1,j],garr[i,j]] poly=Polygon([[p.x,p.y] for p in temp_points]) poly_list+=[poly] # creating a geodataframe, including some additional numeric and string variables: gdf=gpd.GeoDataFrame() gdf['geometry']= poly_list gdf['id']=list(range(len(gdf['geometry']))) gdf['numeric']=0 gdf['string']='foo' # creating some holes in the grid: gdf['included']=[np.random.choice([True,False],p=[.95,.05]) for x in range(len(gdf))] gdf_polys=gdf[gdf['included']]

# creating a pandas dataframe with points (csv of coordinates imported to pandas): npoints=(gridsize+2)**2*10 fgridend=gridsize+1 fgridstart=-1 xlist=[] ylist=[] points=[] for i in range(npoints): x=fgridstart+np.random.random()*fgridend y=fgridstart+np.random.random()*fgridend xlist+=[x] ylist+=[y] df=pd.DataFrame(list(zip(xlist,ylist)),columns=['x','y']) coords=[Point(xy) for xy in zip(df['x'],df['y'])] gdf_points=gpd.GeoDataFrame(df,geometry=coords)

def id_gen(row): point=row['geometry'] out=0 for i,poly in shapes_list: if poly.contains(point): out=i break return out #shapes_list=gdf_polys['geometry'] shapes_list=[(gdf_polys['id'].iloc[i],gdf_polys['geometry'].iloc[i]) for i in range(len(gdf_polys['geometry']))] point_list=[] gdf_points['poly']=gdf_points.apply(id_gen,axis=1)

x y geometry poly 0 4.865555 1.777419 POINT (4.86555 1.77742) 37 1 6.929483 3.041826 POINT (6.92948 3.04183) 57 2 4.485133 1.492326 POINT (4.48513 1.49233) 37 3 2.889222 6.159370 POINT (2.88922 6.15937) 24 4 2.442262 7.456090 POINT (2.44226 7.45609) 25 ... ... ... ... ... 1435 6.414556 5.254309 POINT (6.41456 5.25431) 59 1436 6.409027 4.454615 POINT (6.40903 4.45461) 58 1437 5.763154 2.770337 POINT (5.76315 2.77034) 47 1438 9.613874 1.371165 POINT (9.61387 1.37116) 0 1439 6.013953 3.622011 POINT (6.01395 3.62201) 57 1440 rows × 4 columns

2条回答

网友

1楼 · 编辑于 2024-10-01 13:30:11

geopandas没有将其视为多边形中的质点，而是提供了一种在此处非常有用的空间连接方法。它实际上相当快，至少在这个玩具示例中，它似乎并不完全受多边形数量的影响（我不能排除这可能是由于这些多边形的简单性）

Spatial join获取两个GeodataFrame并将它们合并在一起。在本例中，我希望多边形的属性附加到位于其中的点。因此，我的代码如下所示：

joined=gpd.sjoin(gdf_points,gdf_polys,how='left',op='within')

    x   y   geometry    poly    index_right id  numeric string  included
0   18.651358   26.920261   POINT (18.65136 26.92026)   908 908.0   908.0   0.0 foo True
1   38.577101   1.505424    POINT (38.57710 1.50542)    1863    1863.0  1863.0  0.0 foo True
2   15.430436   15.543219   POINT (15.43044 15.54322)   750 750.0   750.0   0.0 foo True
3   44.928141   7.726345    POINT (44.92814 7.72635)    2163    2163.0  2163.0  0.0 foo True
4   34.259632   5.373809    POINT (34.25963 5.37381)    1671    1671.0  1671.0  0.0 foo True
... ... ... ... ... ... ... ... ... ...
27035   32.386086   23.440186   POINT (32.38609 23.44019)   1591    1591.0  1591.0  0.0 foo True
27036   7.569414    1.836633    POINT (7.56941 1.83663) 344 344.0   344.0   0.0 foo True
27037   1.141440    34.739388   POINT (1.14144 34.73939)    83  83.0    83.0    0.0 foo True
27038   -0.770784   14.027607   POINT (-0.77078 14.02761)   0   NaN NaN NaN NaN NaN
27039   12.695803   33.405048   POINT (12.69580 33.40505)   621 621.0   621.0   0.0 foo True

与我最初的代码相比，这是非常快的。运行我测试的最大尺寸（27k点）需要60毫秒（相比之下，之前的代码需要1.5分钟）。根据我的一些实际工作，1mil点仅用了13秒就匹配成了不到200k的多边形，其中大多数比我的玩具示例中使用的几何体要复杂得多。这似乎是一种易于管理的方法，但我有兴趣学习进一步提高效率的方法

网友

2楼 · 编辑于 2024-10-01 13:30:11

听起来，通过使用最近的STRtree算法（如the documentation中所述，以及上面关于恢复多边形索引的注释）并检查点是否位于最近的多边形内，可以避免迭代所有多边形。例如

from shapely.strtree import STRtree
#... coords is the list of shapely points and poly_list is the list of polygons ...
#to recover the polygon id, use their unique python id.
poly_id = dict((id(poly), i) for i, poly in enumerate(poly_list))
#form stretree of polygons
poly_tree = STRtree(poly_list)
pt_to_id = []
#fill pt_to_id with the nearest polygon if it contains the given point. If the point is within no polygon write None.
for c in coords:
    near = poly_tree.nearest(c)
    if near.contains(c):
        pt_to_id.append(poly_id[id(near)])
    else:
        pt_to_id.append(None)

相关问题更多 >

编程相关推荐

热门问题

热门文章