在一个文件夹中创建多个pandas文件

fh = open("ethnicity_files/imdbnames1.pkl", 'rb') d = pickle.load(fh) df = pd.concat({k:json_normalize(v, 'scores', ['best']) for k,v in d.items()}) df = df.reset_index(level=1, drop=True).rename_axis('names').reset_index() df.head() names ethnicity score best 0 !Gubi Tietie Asian 0.03 GreaterEuropean 1 !Gubi Tietie GreaterAfrican 0.01 GreaterEuropean 2 !Gubi Tietie GreaterEuropean 0.96 GreaterEuropean 3 !Gubi Tietie British 0.17 WestEuropean 4 !Gubi Tietie Jewish 0.13 WestEuropean 5 !Gubi Tietie WestEuropean 0.65 WestEuropean 6 !Gubi Tietie EastEuropean 0.05 WestEuropean 7 !Gubi Tietie Nordic 0.00 Italian 8 !Gubi Tietie Italian 0.69 Italian 9 !Gubi Tietie Hispanic 0.12 Italian 10 !Gubi Tietie French 0.16 Italian 11 !Gubi Tietie Germanic 0.02 Italian 12 $2 Tony Asian 0.00 GreaterEuropean 13 $2 Tony GreaterAfrican 0.00 GreaterEuropean 14 $2 Tony GreaterEuropean 1.00 GreaterEuropean 15 $2 Tony British 0.00 WestEuropean 16 $2 Tony Jewish 0.00 WestEuropean 17 $2 Tony WestEuropean 1.00 WestEuropean 18 $2 Tony EastEuropean 0.00 WestEuropean 19 $2 Tony Nordic 0.00 Italian

2条回答

网友

1楼 · 编辑于 2024-05-08 15:47:16

您可以使用glob.glob迭代当前文件夹中具有特定扩展名（.pkl）的所有文件

import os
import glob
cd=os.getcwd()
os.chdir('path_to_your_folder')

for file in glob.glob("*.pkl"):
  fh = open(str(file), 'rb')
  d = pickle.load(fh)
  df = pd.concat({k:json_normalize(v, 'scores', ['best']) for k,v in d.items()})
  df = df.reset_index(level=1, drop=True).rename_axis('names').reset_index()
os.chdir(cd)
print df.head()

网友

2楼 · 编辑于 2024-05-08 15:47:16

我想你需要os.listdir()：

#Be careful this might give you a memory error if you 
#don't have enough ram for all your files 
#and make sure the folder contains only the files you want to read
import os
files = os.listdir('ethnicity_files/')

list_of_dfs = []
for file in files:
    d = pickle.load(os.path.join('ethnicity_files/',file))
    df = pd.concat({k:json_normalize(v, 'scores', ['best']) for k,v in d.items()})
    df = df.reset_index(level=1, drop=True).rename_axis('names').reset_index()
    list_of_dfs.append(df)
big_df = pd.concat(list_of_dfs, ignore_index=True)#ignore_index to reset index of big_df
big_df.head()

相关问题更多 >

编程相关推荐

热门问题

热门文章