将嵌套JSON展平到pandas.DataFrame：基于字典值对列进行排序和命名

{ "product": "example_productname", "product_id": "example_productid", "product_type": "example_producttype", "producer": "example_producer", "currency": "example_currency", "client_id": "example_clientid", "supplement": [ { "supplementtype": "RTZ", "price": 300000, "rebate": "500", }, { "supplementtype": "CVB", "price": 500000, "rebate": "250", }, { "supplementtype": "JKL", "price": 100000, "rebate": "750", }, ], }

d = { "product": "example_productname", "product_id": "example_productid", "product_type": "example_producttype", "producer": "example_producer", "currency": "example_currency", "client_id": "example_clientid", "supplement": [ { "supplementtype": "RTZ", "price": 300000, "rebate": "500", }, { "supplementtype": "CVB", "price": 500000, "rebate": "250", }, { "supplementtype": "JKL", "price": 100000, "rebate": "750", }, ], } for s in d["supplement"]: try: d["supplementtype_{}_price".format(s["supplementtype"])] = s["price"] except: pass try: d["supplementtype_{}_rebate".format(s["supplementtype"])] = s["rebate"] except: pass del d["supplement"] df = pd.DataFrame([d]) print(df) product product_id product_type producer currency client_id supplementtype_RTZ_price supplementtype_RTZ_rebate supplementtype_CVB_price supplementtype_CVB_rebate supplementtype_JKL_price supplementtype_JKL_rebate 0 example_productname example_productid example_producttype example_producer example_currency example_clientid 300000 500 500000 250 100000 750

def flatten_json(nested_json: dict, exclude: list=[''], sep: str='_') -> dict: """ Flatten a list of nested dicts. """ out = dict() def flatten(x: (list, dict, str), name: str='', exclude=exclude): if type(x) is dict: for a in x: if a not in exclude: flatten(x[a], f'{name}{a}{sep}') elif type(x) is list: i = 0 for a in x: flatten(a, f'{name}{i}{sep}') i += 1 else: out[name[:-1]] = x flatten(nested_json) return out # list of files files = ['test1.json', 'test2.json'] # list to add dataframe from each file df_list = list() # iterate through files for file in files: with open(file, 'r') as f: # read with json data = json.loads(f.read()) # flatten_json into a dataframe and add to the dataframe list df_list.append(pd.DataFrame.from_dict(flatten_json(data), orient='index').T) # concat all dataframes together df = pd.concat(df_list).reset_index(drop=True)

1条回答

网友
1楼 · 发布于 2024-09-29 17:13:49

您可以在创建数据帧之前修改字典：
d = { "product": "example_productname", "product_id": "example_productid", "product_type": "example_producttype", "producer": "example_producer", "currency": "example_currency", "client_id": "example_clientid", "supplement": [ { "supplementtype": "RTZ", "price": 300000, "rebate": "500", }, { "supplementtype": "CVB", "price": 500000, "rebate": "250", }, { "supplementtype": "JKL", "price": 100000, "rebate": "750", }, ], } for s in d["supplement"]: d["supplementtype_{}_price".format(s["supplementtype"])] = s["price"] d["supplementtype_{}_rebate".format(s["supplementtype"])] = s["rebate"] del d["supplement"] df = pd.DataFrame([d]) print(df)
印刷品：
product product_id product_type producer currency client_id supplementtype_RTZ_price supplementtype_RTZ_rebate supplementtype_CVB_price supplementtype_CVB_rebate supplementtype_JKL_price supplementtype_JKL_rebate 0 example_productname example_productid example_producttype example_producer example_currency example_clientid 300000 500 500000 250 100000 750

相关问题更多 >

编程相关推荐

热门问题

热门文章