Pandas read_SAS抛出OutOfBoundsDatetime错误,但数据集中没有日期列

2024-03-28 19:01:41 发布

您现在位置:Python中文网/ 问答频道 /正文

所以我检查了pandas.to_datetime gives OutOfBoundsDatetime ErrorImporting datetimes to pandas DataFrame raises OutOfBoundsDatetime error没有帮助

我试图从SAS数据集中分块某些列,但这些列都不是日期时间格式

下面是我得到的错误:

---------------------------------------------------------------------------
OverflowError                             Traceback (most recent call last)
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()

pandas\_libs\tslibs\timedeltas.pyx in pandas._libs.tslibs.timedeltas.cast_from_unit()

OverflowError: int too big to convert

During handling of the above exception, another exception occurred:

OutOfBoundsDatetime                       Traceback (most recent call last)
<ipython-input-19-2b6827de404c> in <module>
      1 chunk_list = []
      2 
----> 3 for chunk in df:
      4     chunk_filter =chunk_preprocessing(chunk)
      5     chunk_list.append(chunk)

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in __next__(self)
    246 
    247     def __next__(self):
--> 248         da = self.read(nrows=self.chunksize or 1)
    249         if da is None:
    250             raise StopIteration

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in read(self, nrows)
    631         p.read(nrows)
    632 
--> 633         rslt = self._chunk_to_dataframe()
    634         if self.index is not None:
    635             rslt = rslt.set_index(self.index)

~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in _chunk_to_dataframe(self)
    685                     if unit:
    686                         rslt[name] = pd.to_datetime(rslt[name], unit=unit,
--> 687                                                     origin="1960-01-01")
    688                 jb += 1
    689             elif self._column_types[j] == b's':

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
    590         else:
    591             from pandas import Series
--> 592             values = convert_listlike(arg._values, True, format)
    593             result = Series(values, index=arg.index, name=arg.name)
    594     elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):

~\AppData\Roaming\Python\Python37\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, box, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
    201         arg = getattr(arg, 'values', arg)
    202         result = tslib.array_with_unit_to_datetime(arg, unit,
--> 203                                                    errors=errors)
    204         if box:
    205             if errors == 'ignore':

pandas\_libs\tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()

OutOfBoundsDatetime: cannot convert input 376199.0 with the unit 'd'

下面是我用来分块的代码:

import pandas as pd
df = pd.read_sas('C:\\Users\\jordan.howell\\Box\Motorcycle\\NEW MODEL DO NOT USE -EXPERIMENT\\final2019new.sas7bdat'
                 , format = 'sas7bdat', encoding='latin-1', chunksize=100000)

def chunk_preprocessing(chunk):

    columns = ['agefni', 'anti_theft_code', 'atfault', 'BI_cnt', 'BI_earned', 'bi_eu', 'bi_if', 'bi_incrd', 'bi_lae'
               , 'BI_lmt', 'cl_cnt', 'cl_incrd', 'cl_lae', 'CLded', 'cm_cnt', 'cm_incrd', 'cm_lae', 'CMded', 'cmt_cnt'
              , 'cmt_incrd', 'cmt_lae', 'cnty', 'coll_earned', 'coll_eu', 'coll_if', 'comp_earned', 'comp_eu', 'comp_if'
              , 'componly', 'DRIVER_AGE', 'dunit', 'DS_AB', 'DS_AD', 'DS_AK'
              , 'DS_AT', 'DS_CH', 'DS_DD', 'DS_DE', 'DS_DF', 'DS_FP', 'DS_FQ', 'ds_gd', 'DS_IP', 'DS_KS', 'DS_LB', 'DS_LY'
              , 'DS_MC', 'DS_ME', 'DS_ML', 'DS_MM', 'DS_MO', 'DS_MP', 'DS_MR', 'DS_MT', 'DS_MV', 'DS_PD', 'DS_PF', 'DS_PN'
              , 'DS_PF', 'DS_PY', 'DS_RP', 'DS_SB', 'DS_SF', 'DS_SP', 'DS_ST', 'DS_TP', 'DS_TR','effyear', 'FIN_RESP_CD'
              , 'majorvio', 'MARITAL_STATUS', 'minorvio', 'mp_cnt', 'mp_earned', 'mp_eu', 'mp_if', 'mp_lae', 'mp_cnt'
              , 'MPlmt', 'MVEH_CC', 'mveh_pkg_typ_cd', 'payplan', 'pd_cnt', 'pd_earned', 'pd_eu', 'pd_if', 'pd_incrd'
              , 'pd_lae', 'PDlmt', 'policy', 'polterm', 'prdtype', 'producer', 'RATING_CLASS_CODE', 'risk'
              , 'score', 'SD_AB', 'SD_SB', 'SD_TP, SD_TR', 'ST_AD', 'ST_AI', 'ST_CI', 'ST_DD', 'ST_DF', 'ST_MF', 'ST_MI'
              , 'ST_MS', 'ST_RC', 'ST_RI', 'state', 'stored_locked_ind', 'term', 'terr', 'totalep', 'TOTL_YRS_LCNS_CNT'
              , 'um_cnt', 'um_earned', 'um_eu', 'um_if', 'um_incrd', 'um_lae', 'unit_drv_exp', 'units'
              , 'unitval', 'unitzip', 'v_age', 'yrs_owned']

    chunk = chunk[columns]

    return chunk

chunk_list = []

for chunk in df:
    chunk_filter =chunk_preprocessing(chunk)
    chunk_list.append(chunk)

df_concat = pd.concat(chunk_list)

我甚至不知道我是否正确地分块


Tags: toinselfpandasdatetimeifargds