扁平化Pandas数据帧

import pandas as pd df = pd.DataFrame(data={"CommonIdentifier":[1234,1235,1236,1237], "CommonValue":["type1","type2","type1","type1"], "Identifier1":["a","a","b","b"], "Value1":[1,1.5,2,1.3], "Identifier2":["b","b","c","a"], "Value2":[4,2,3.2,1]}) df CommonIdentifier CommonValue Identifier1 Identifier2 Value1 Value2 0 1234 type1 a b 1.0 4.0 1 1235 type2 a b 1.5 2.0 2 1236 type1 b c 2.0 3.2 3 1237 type1 b a 1.3 1.0

num_identifiers = 2 m1 = pd.melt(df,id_vars=['CommonIdentifier'], value_vars=['Value1','Value2']) m2 = pd.melt(df,id_vars=['CommonIdentifier'], value_vars=['Identifier1','Identifier2']) m3 = pd.concat([df[["CommonIdentifier","CommonValue"]]] * num_identifiers) m3.index = range(len(m3)) pd.concat([m3[["CommonIdentifier","CommonValue"]], m2[["value"]].rename(columns={"value":"IdentifierJoined"}), m1[["value"]].rename(columns={"value":"ValueJoined"})], axis=1)

2条回答

网友

1楼 · 编辑于 2024-09-26 22:13:52

下面是一个使用MultiIndex、stack()和merge()的方法：

import pandas as pd
df = pd.DataFrame(data={"CommonIdentifier":[1234,1235,1236,1237],
                        "CommonValue":["type1","type2","type1","type1"],
                        "Identifier1":["a","a","b","b"],
                        "Value1":[1,1.5,2,1.3],
                        "Identifier2":["b","b","c","a"],
                         "Value2":[4,2,3.2,1]})

common_columns = ["CommonIdentifier", "CommonValue"]
identifier_columns = ["Identifier1", "Identifier2"]
value_columns = ["Value1", "Value2"]

common_df = df[common_columns]
mix_df = df[identifier_columns + value_columns]
mix_df.columns = pd.MultiIndex.from_product([["Identifier", "Value"], range(len(identifier_columns))])
pd.merge(common_df, mix_df.stack().reset_index(level=1, drop=True),
         left_index=True, right_index=True).reset_index(drop=True)

网友

2楼 · 编辑于 2024-09-26 22:13:52

您可以使用df[[...]]来选择所需的列， rename(columns=...)更改列名，然后 pd.concat要堆叠子数据帧：

import pandas as pd

df = pd.DataFrame(data={"CommonIdentifier":[1234,1235,1236,1237],
                        "CommonValue":["type1","type2","type1","type1"],
                        "Identifier1":["a","a","b","b"],
                        "Value1":[1,1.5,2,1.3],
                        "Identifier2":["b","b","c","a"],
                         "Value2":[4,2,3.2,1]})

colgroups = [['CommonIdentifier', 'CommonValue', 'Identifier%d'%i, 'Value%d'%i]
        for i in range(1,3)]

colmap = {'{}{}'.format(col,i):'{}Joined'.format(col) 
          for i in range(1,3) for col in ('Identifier', 'Value')}

result = pd.concat([df[cols].rename(columns=colmap) for cols in colgroups])
print(result)

收益率

^{pr2}$

另一个受HYRY解决方案启发的选择是隐藏索引中的公共列，然后应用HYRY的stacking技巧：

import pandas as pd
df = pd.DataFrame(data={"CommonIdentifier":[1234,1235,1236,1237],
                        "CommonValue":["type1","type2","type1","type1"],
                        "Identifier1":["a","a","b","b"],
                        "Value1":[1,1.5,2,1.3],
                        "Identifier2":["b","b","c","a"],
                         "Value2":[4,2,3.2,1]})

df = df.set_index(['CommonIdentifier', 'CommonValue'])
df.columns = pd.MultiIndex.from_product([["Identifier", "Value"], range(2)])
df = df.stack()
df.index = df.index.droplevel(-1)
print(df.reset_index())

相关问题更多 >

编程相关推荐

热门问题

热门文章