从csv加载数据帧时，将数据帧单元格中的列表分解为行的代码失败。

import pandas as pd #Create The DataFrame df = (pd.DataFrame({ 'opponent': ['76ers', 'blazers', 'bobcats'], 'nearest_neighbors': [['Zach LaVine', 'Jeremy Lin', 'Nate Robinson', 'Isaia']] * 3}) .set_index([ 'opponent'])) #Explode The Data Frame (pd.melt(df.nearest_neighbors.apply(pd.Series).reset_index(), id_vars=[ 'opponent'], value_name='nearest_neighbors') .set_index([ 'opponent']) .drop('variable', axis=1) .dropna() .sort_index() ) #Save DF to CSV df.to_csv("Baskets.CSV") #Load DF to CSV df = pd.read_csv("Baskets.csv") # Re Run Explode Code on csv (pd.melt(df.nearest_neighbors.apply(pd.Series).reset_index(), id_vars=[ 'opponent'], value_name='nearest_neighbors') .set_index([ 'opponent']) .drop('variable', axis=1) .dropna() .sort_index() ) KeyError Traceback (most recent call last) ~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2441 try: -> 2442 return self._engine.get_loc(key) 2443 except KeyError: pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'opponent' During handling of the above exception, another exception occurred: KeyError Traceback (most recent call last) <ipython-input-25-6ffdc50e8f2c> in <module>() 1 (pd.melt(df.nearest_neighbors.apply(pd.Series).reset_index(), 2 id_vars=[ 'opponent'], ----> 3 value_name='nearest_neighbors') 4 .set_index([ 'opponent']) 5 .drop('variable', axis=1) ~/anaconda3/lib/python3.6/site-packages/pandas/core/reshape/reshape.py in melt(frame, id_vars, value_vars, var_name, value_name, col_level) 761 mdata = {} 762 for col in id_vars: --> 763 mdata[col] = np.tile(frame.pop(col).values, K) 764 765 mcolumns = id_vars + var_name + [value_name] ~/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in pop(self, item) 548 Return item and drop from frame. Raise KeyError if not found. 549 """ --> 550 result = self[item] 551 del self[item] 552 try: ~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key) 1962 return self._getitem_multilevel(key) 1963 else: -> 1964 return self._getitem_column(key) 1965 1966 def _getitem_column(self, key): ~/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_column(self, key) 1969 # get column 1970 if self.columns.is_unique: -> 1971 return self._get_item_cache(key) 1972 1973 # duplicate columns & possible reduce dimensionality ~/anaconda3/lib/python3.6/site-packages/pandas/core/generic.py in _get_item_cache(self, item) 1643 res = cache.get(item) 1644 if res is None: -> 1645 values = self._data.get(item) 1646 res = self._box_item_values(item, values) 1647 cache[item] = res ~/anaconda3/lib/python3.6/site-packages/pandas/core/internals.py in get(self, item, fastpath) 3588 3589 if not isnull(item): -> 3590 loc = self.items.get_loc(item) 3591 else: 3592 indexer = np.arange(len(self.items))[isnull(self.items)] ~/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance) 2442 return self._engine.get_loc(key) 2443 except KeyError: -> 2444 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2445 2446 indexer = self.get_indexer([key], method=method, tolerance=tolerance) pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas/_libs/hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 'opponent'

1条回答

网友

1楼 · 发布于 2024-10-02 00:33:40

当您保存到csv文件并从中读取时，初始索引将重置为默认索引。要修复它，您需要读取csv，将index_col设置为opponent。你知道吗

而不是：

#Load DF to CSV
df = pd.read_csv("Baskets.csv")

尝试使用：

#Load DF to CSV
df = pd.read_csv("Baskets.csv", index_col='opponent')

要将nearest_neighbors列转换为列表，还需要执行以下操作：

from ast import literal_eval

df.nearest_neighbors=df.nearest_neighbors.apply(literal_eval)

在那之后，我可以让熔化工作：

(pd.melt(df.nearest_neighbors.apply(pd.Series).reset_index(), 
     id_vars=[ 'opponent'],
     value_name='nearest_neighbors')
 .set_index([ 'opponent'])
 .drop('variable', axis=1)
 .dropna()
 .sort_index()
)

输出：

             nearest_neighbors
opponent                  
76ers          Zach LaVine
76ers           Jeremy Lin
76ers        Nate Robinson
76ers                Isaia
blazers        Zach LaVine
blazers         Jeremy Lin
blazers      Nate Robinson
blazers              Isaia
bobcats        Zach LaVine
bobcats         Jeremy Lin
bobcats      Nate Robinson
bobcats              Isaia

相关问题更多 >

编程相关推荐

热门问题

热门文章