(内存)访问Dask类型阵列时出现问题

2024-06-25 23:11:45 发布

您现在位置:Python中文网/ 问答频道 /正文

我需要加载一些气象数据来分析几个月,但这些数据存储在只覆盖一天的文件中,因此我需要一次访问多个文件

我遵循一些预先给定的指令,告诉我在我的计算机中创建一个内存分区

from datetime import datetime, timedelta
import dask.array as da
from dask.distributed import Client, LocalCluster
import xarray

try:
    client
except NameError:
    client = Client(n_workers=1, threads_per_worker=4, memory_limit='2GB')
else:
    print("Client already exists")

在这之后,我创建了一个从6月1日到10月1日的数组日期,它需要在“文件”中获得气象数据的链接

dates=[datetime(2019,6,1) + timedelta(days=i) for i in range(3*30)]
files= [date.strftime('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/%Y/%m/wrf_arw_det_history_d03_%Y%m%d_0000.nc4') for date in dates]

当我试图将所有数据解压为

multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))

它引发了一个错误:

KeyError                                  Traceback (most recent call last)
~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
    197             try:
--> 198                 file = self._cache[self._key]
    199             except KeyError:

~\Nueva carpeta\lib\site-packages\xarray\backends\lru_cache.py in __getitem__(self, key)
     52         with self._lock:
---> 53             value = self._cache[key]
     54             self._cache.move_to_end(key)

KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False))]

During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)
<ipython-input-19-c3d0f4a8cc26> in <module>
----> 1 multi = xarray.open_mfdataset(files, preprocess= lambda a : a.isel(time=slice(0,24)))

~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_mfdataset(paths, chunks, concat_dim, compat, preprocess, engine, lock, data_vars, coords, combine, autoclose, parallel, join, attrs_file, **kwargs)
    916         getattr_ = getattr
    917 
--> 918     datasets = [open_(p, **open_kwargs) for p in paths]
    919     file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
    920     if preprocess is not None:

~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in <listcomp>(.0)
    916         getattr_ = getattr
    917 
--> 918     datasets = [open_(p, **open_kwargs) for p in paths]
    919     file_objs = [getattr_(ds, "_file_obj") for ds in datasets]
    920     if preprocess is not None:

~\Nueva carpeta\lib\site-packages\xarray\backends\api.py in open_dataset(filename_or_obj, group, decode_cf, mask_and_scale, decode_times, autoclose, concat_characters, decode_coords, engine, chunks, lock, cache, drop_variables, backend_kwargs, use_cftime, decode_timedelta)
    507         if engine == "netcdf4":
    508             store = backends.NetCDF4DataStore.open(
--> 509                 filename_or_obj, group=group, lock=lock, **backend_kwargs
    510             )
    511         elif engine == "scipy":

~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
    356             netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
    357         )
--> 358         return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
    359 
    360     def _acquire(self, needs_lock=True):

~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in __init__(self, manager, group, mode, lock, autoclose)
    312         self._group = group
    313         self._mode = mode
--> 314         self.format = self.ds.data_model
    315         self._filename = self.ds.filepath()
    316         self.is_remote = is_remote_uri(self._filename)

~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in ds(self)
    365     @property
    366     def ds(self):
--> 367         return self._acquire()
    368 
    369     def open_store_variable(self, name, var):

~\Nueva carpeta\lib\site-packages\xarray\backends\netCDF4_.py in _acquire(self, needs_lock)
    359 
    360     def _acquire(self, needs_lock=True):
--> 361         with self._manager.acquire_context(needs_lock) as root:
    362             ds = _nc4_require_group(root, self._group, self._mode)
    363         return ds

~\Nueva carpeta\lib\contextlib.py in __enter__(self)
    110         del self.args, self.kwds, self.func
    111         try:
--> 112             return next(self.gen)
    113         except StopIteration:
    114             raise RuntimeError("generator didn't yield") from None

~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in acquire_context(self, needs_lock)
    184     def acquire_context(self, needs_lock=True):
    185         """Context manager for acquiring a file."""
--> 186         file, cached = self._acquire_with_cache_info(needs_lock)
    187         try:
    188             yield file

~\Nueva carpeta\lib\site-packages\xarray\backends\file_manager.py in _acquire_with_cache_info(self, needs_lock)
    202                     kwargs = kwargs.copy()
    203                     kwargs["mode"] = self._mode
--> 204                 file = self._opener(*self._args, **kwargs)
    205                 if self._mode == "w":
    206                     # ensure file doesn't get overriden when opened again

netCDF4\_netCDF4.pyx in netCDF4._netCDF4.Dataset.__init__()

netCDF4\_netCDF4.pyx in netCDF4._netCDF4._ensure_nc_success()

OSError: [Errno -37] NetCDF: Write to read only: b'http://mandeo.meteogalicia.es/thredds/dodsC/modelos/WRF_HIST/d03/2019/06/wrf_arw_det_history_d03_20190626_0000.nc4'

有人知道为什么会发生这种错误吗?任何帮助都将不胜感激。提前谢谢


Tags: inpyselflockmodelibpackagessite