所以我检查了pandas.to_datetime gives OutOfBoundsDatetime Error和Importing datetimes to pandas DataFrame raises OutOfBoundsDatetime error没有帮助
我试图从SAS数据集中分块某些列,但这些列都不是日期时间格式
下面是我得到的错误:
---------------------------------------------------------------------------
OverflowError Traceback (most recent call last)
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()
pandas\_libs\tslibs\timedeltas.pyx in pandas._libs.tslibs.timedeltas.cast_from_unit()
OverflowError: int too big to convert
During handling of the above exception, another exception occurred:
OutOfBoundsDatetime Traceback (most recent call last)
<ipython-input-19-2b6827de404c> in <module>
1 chunk_list = []
2
----> 3 for chunk in df:
4 chunk_filter =chunk_preprocessing(chunk)
5 chunk_list.append(chunk)
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in __next__(self)
246
247 def __next__(self):
--> 248 da = self.read(nrows=self.chunksize or 1)
249 if da is None:
250 raise StopIteration
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in read(self, nrows)
631 p.read(nrows)
632
--> 633 rslt = self._chunk_to_dataframe()
634 if self.index is not None:
635 rslt = rslt.set_index(self.index)
~\AppData\Roaming\Python\Python37\site-packages\pandas\io\sas\sas7bdat.py in _chunk_to_dataframe(self)
685 if unit:
686 rslt[name] = pd.to_datetime(rslt[name], unit=unit,
--> 687 origin="1960-01-01")
688 jb += 1
689 elif self._column_types[j] == b's':
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\tools\datetimes.py in to_datetime(arg, errors, dayfirst, yearfirst, utc, box, format, exact, unit, infer_datetime_format, origin, cache)
590 else:
591 from pandas import Series
--> 592 values = convert_listlike(arg._values, True, format)
593 result = Series(values, index=arg.index, name=arg.name)
594 elif isinstance(arg, (ABCDataFrame, compat.MutableMapping)):
~\AppData\Roaming\Python\Python37\site-packages\pandas\core\tools\datetimes.py in _convert_listlike_datetimes(arg, box, format, name, tz, unit, errors, infer_datetime_format, dayfirst, yearfirst, exact)
201 arg = getattr(arg, 'values', arg)
202 result = tslib.array_with_unit_to_datetime(arg, unit,
--> 203 errors=errors)
204 if box:
205 if errors == 'ignore':
pandas\_libs\tslib.pyx in pandas._libs.tslib.array_with_unit_to_datetime()
OutOfBoundsDatetime: cannot convert input 376199.0 with the unit 'd'
下面是我用来分块的代码:
import pandas as pd
df = pd.read_sas('C:\\Users\\jordan.howell\\Box\Motorcycle\\NEW MODEL DO NOT USE -EXPERIMENT\\final2019new.sas7bdat'
, format = 'sas7bdat', encoding='latin-1', chunksize=100000)
def chunk_preprocessing(chunk):
columns = ['agefni', 'anti_theft_code', 'atfault', 'BI_cnt', 'BI_earned', 'bi_eu', 'bi_if', 'bi_incrd', 'bi_lae'
, 'BI_lmt', 'cl_cnt', 'cl_incrd', 'cl_lae', 'CLded', 'cm_cnt', 'cm_incrd', 'cm_lae', 'CMded', 'cmt_cnt'
, 'cmt_incrd', 'cmt_lae', 'cnty', 'coll_earned', 'coll_eu', 'coll_if', 'comp_earned', 'comp_eu', 'comp_if'
, 'componly', 'DRIVER_AGE', 'dunit', 'DS_AB', 'DS_AD', 'DS_AK'
, 'DS_AT', 'DS_CH', 'DS_DD', 'DS_DE', 'DS_DF', 'DS_FP', 'DS_FQ', 'ds_gd', 'DS_IP', 'DS_KS', 'DS_LB', 'DS_LY'
, 'DS_MC', 'DS_ME', 'DS_ML', 'DS_MM', 'DS_MO', 'DS_MP', 'DS_MR', 'DS_MT', 'DS_MV', 'DS_PD', 'DS_PF', 'DS_PN'
, 'DS_PF', 'DS_PY', 'DS_RP', 'DS_SB', 'DS_SF', 'DS_SP', 'DS_ST', 'DS_TP', 'DS_TR','effyear', 'FIN_RESP_CD'
, 'majorvio', 'MARITAL_STATUS', 'minorvio', 'mp_cnt', 'mp_earned', 'mp_eu', 'mp_if', 'mp_lae', 'mp_cnt'
, 'MPlmt', 'MVEH_CC', 'mveh_pkg_typ_cd', 'payplan', 'pd_cnt', 'pd_earned', 'pd_eu', 'pd_if', 'pd_incrd'
, 'pd_lae', 'PDlmt', 'policy', 'polterm', 'prdtype', 'producer', 'RATING_CLASS_CODE', 'risk'
, 'score', 'SD_AB', 'SD_SB', 'SD_TP, SD_TR', 'ST_AD', 'ST_AI', 'ST_CI', 'ST_DD', 'ST_DF', 'ST_MF', 'ST_MI'
, 'ST_MS', 'ST_RC', 'ST_RI', 'state', 'stored_locked_ind', 'term', 'terr', 'totalep', 'TOTL_YRS_LCNS_CNT'
, 'um_cnt', 'um_earned', 'um_eu', 'um_if', 'um_incrd', 'um_lae', 'unit_drv_exp', 'units'
, 'unitval', 'unitzip', 'v_age', 'yrs_owned']
chunk = chunk[columns]
return chunk
chunk_list = []
for chunk in df:
chunk_filter =chunk_preprocessing(chunk)
chunk_list.append(chunk)
df_concat = pd.concat(chunk_list)
我甚至不知道我是否正确地分块
目前没有回答
相关问题 更多 >
编程相关推荐