沿2维合并2个Xarray数据阵列（以便从粗网格中获得更细的网格）

<xarray.DataArray (x: 5, y: 3)> array([[nan, nan, nan], [nan, nan, nan], [nan, nan, nan], [nan, nan, nan], [nan, nan, nan]]) Coordinates: * x (x) float64 0.0 0.5 1.0 1.5 2.0 * y (y) float64 8.0 8.5 9.0

<xarray.DataArray (x: 5, y: 3)> array([[ 1., nan, 2.], [nan, nan, nan], [ 3., nan, 4.], [nan, nan, nan], [ 5., nan, 6.]]) Coordinates: * x (x) float64 0.0 0.5 1.0 1.5 2.0 * y (y) float64 8.0 8.5 9.0

import xarray as xr # Create first DataArray da_1 = xr.DataArray([[1, 2], [3, 4], [5, 6]], dims=("x", "y"), coords={"x": [0,1,2], "y": [8,9]}) print(da_1) print("*"*50) # Create second DataArray nan = float("NaN") da_2_data = [[nan, nan, nan], [nan, nan, nan], [nan, nan, nan], [nan, nan, nan], [nan, nan, nan]] da_2 = xr.DataArray(da_2_data, dims=("x", "y"), coords={"x": [0, 0.5, 1, 1.5, 2], "y": [8, 8.5, 9]}) print(da_2) print("*"*50) # Trying to combine combined = xr.combine_by_coords([da_1, da_2]) print(combined) print("*"*50) expected_data = [[1, nan, 2], [nan, nan, nan], [3, nan, 4], [nan, nan, nan], [5, nan, 6]] # Expected output (grid with resolution x2) expected = xr.DataArray(expected_data, dims=("x", "y"), coords={"x": [0, 0.5, 1, 1.5, 2], "y": [8, 8.5, 9]}) print(expected) print("*"*50) # If all is OK, we should get the same results as in da_1 for identical coordinates x0_y8 = expected.sel(x=0, y=8).values x0_y9 = expected.sel(x=0, y=9).values x1_y8 = expected.sel(x=1, y=8).values x1_y9 = expected.sel(x=1, y=9).values x2_y8 = expected.sel(x=2, y=8).values x2_y9 = expected.sel(x=2, y=9).values assert(x0_y8 == 1) assert(x0_y9 == 2) assert(x1_y8 == 3) assert(x1_y9 == 4) assert(x2_y8 == 5) assert(x2_y9 == 6)

1条回答

网友

1楼 · 发布于 2024-09-28 21:33:37

一种解决方案是利用xarray和pandas之间的连接。您可以检查下面的代码。唯一需要关心的是，如果你的数据非常大，比如气候科学中数十亿行的数据帧，那么速度会有多快。对于其他正常数据集，下面的方法应该可以

# import packages
import xarray as xr
import pandas as pd
import numpy as np

# construct your sample data
da_1 = xr.DataArray([[1, 2], [3, 4], [5, 6]], dims=("x", "y"), 
        coords={"x": [0,1,2], "y": [8,9]})

nan = float("NaN")
da_2_data = [[nan, nan, nan],
            [nan, nan, nan],
            [nan, nan, nan],
            [nan, nan, nan],
            [nan, nan, nan]]

da_2 = xr.DataArray(da_2_data, dims=("x", "y"), 
        coords={"x": [0, 0.5, 1, 1.5, 2], "y": [8, 8.5, 9]})

# build a function to convert xarray to pandas dataframe
def xr_to_df(input_xr):
    df = input_xr.to_dataframe()
    df = df.reset_index(drop=False)
    return df

# assign names to variables in "da_1" and "da_2"
# so you can combine them later
da_1 = da_1.rename("da_1")
da_2 = da_2.rename("da_2")

# conver both to pandas dataframes and combine the results
da_1_df = xr_to_df(da_1)
da_2_df = xr_to_df(da_2)

# now you can see that values from "da_1" and "da_2" are already matched on coordinates
da_df_combined = pd.merge(da_1_df,da_2_df,how='right')
print(da_df_combined)

# from now, conver the above dataframe back to xarray

# first get unique X and Y
# these should be natrually sorted from min to max
x = np.unique(da_df_combined['x'])
y = np.unique(da_df_combined['y'])

print("x:",x)
print("y:",y)

# then reshape the data to match the way it is structured
da_1_reshape =da_df_combined['da_1'].values.reshape(len(x),len(y))

# generate xarray and provide a name for the variable
# since you are only interested in values from "da_1", here we do "da_1" only
da_1_xr = xr.DataArray(da_1_reshape, coords=[('x', x),('y', y)])
da_1_xr = da_1_xr.rename("da_1")

# check your results
print(da_1_xr)

# use your way to doublecheck the values
x0_y8 = da_1_xr.sel(x=0, y=8).values
x0_y9 = da_1_xr.sel(x=0, y=9).values
x1_y8 = da_1_xr.sel(x=1, y=8).values
x1_y9 = da_1_xr.sel(x=1, y=9).values
x2_y8 = da_1_xr.sel(x=2, y=8).values
x2_y9 = da_1_xr.sel(x=2, y=9).values

assert(x0_y8 == 1)
assert(x0_y9 == 2)
assert(x1_y8 == 3)
assert(x1_y9 == 4)
assert(x2_y8 == 5)
assert(x2_y9 == 6)

相关问题更多 >

编程相关推荐

热门问题

热门文章