根据数据帧中的多个列和行的某些条件构造新列

2条回答

网友

1楼 · 编辑于 2024-09-28 03:24:29

这是我的解决方案，但是“历史”包含了dict列表，而不是只包含dict。你知道吗

import pandas as pd, numpy as np
import io

# My test data:
text="""leid  run_seq  cp_id products currency  amount
0      101.0      1.0  201.0        A      YEN   345.0
1      102.0      2.0  201.0        B      INR   223.0
2      101.0      2.0  202.0        A      USD   845.0
3      102.0      3.0  201.0        C      USD   345.0
4      101.0      1.0  201.0        A      YEN   100.0
5      101.0      1.0  203.0        B     EURO   200.0
6      101.0      1.0  203.0        C      AUD   300.0"""

df= pd.read_csv(io.StringIO(text),sep=r"\s+",engine="python").sort_values(["leid","run_seq"])
G= df.groupby(["leid","run_seq"],sort=False)

def mkdict(grp):
    # Out: {201:{A:{YEN:345}}}
    d_cpid={}
    for r in grp.itertuples():
        d_prod= d_cpid.setdefault(r.cp_id, {} )     # {201:{}
        d_curr= d_prod.setdefault(r.products,{})    # {201:{A:{}
        d_curr[r.currency]= d_curr.get(r.currency,0)+r.amount   # {201:{A:{YEN:

    return d_cpid

rslt= G.apply(lambda grp: mkdict(grp))
rslt= rslt.reset_index().rename(columns={0:"current"})

L=[]
G1= rslt.groupby("leid")
for key,grp in G1:
    L.append([])
    lv= grp["current"].values
    for i in range(1,len(lv)):
        L.append(lv[:i])

rslt["history"]= L

编辑：下一次尝试

import pandas as pd, numpy as np
import io

# My test data
text="""leid  run_seq  cp_id products currency  amount
0      101.0      1.0  201.0        A      YEN   345.0
1      102.0      2.0  201.0        B      INR   223.0
2      101.0      2.0  202.0        A      USD   845.0
3      102.0      3.0  201.0        C      USD   345.0
4      101.0      1.0  201.0        A      YEN   100.0
5      101.0      1.0  203.0        B      EUR   200.0
6      101.0      1.0  203.0        C      AUD   300.0
7      101.0      3.0  204.0        D      INR   400.0
8      101.0      2.0  203.0        B      EUR   155.0
"""

df= pd.read_csv(io.StringIO(text),sep=r"\s+",engine="python").sort_values(["leid","run_seq"])
G= df.groupby(["leid","run_seq"],sort=False)

# This function works on a groupby object, and returns list of tuples:
def mklist(grp):
    return [ (r.cp_id,r.products,r.currency,r.amount) for r in grp.itertuples()]

# It makes dictionary from a list of tuples:
def mkdict(lt):

    # Out: { {201:{A:{YEN:345}}}, ... }
    d_cpid={}
    for cpid,prod,curr,amnt in lt:
        d_prod= d_cpid.setdefault(cpid, {})    # {201:{}
        d_curr= d_prod.setdefault(prod,{})      # {201:{A:{}
        d_curr[curr]= d_curr.get(curr,0)+amnt   # {201:{A:{YEN:

    return d_cpid

rslt= G.apply(lambda grp: mklist(grp) )
rslt= rslt.reset_index().rename(columns={0:"current"})

L=[]
G1= rslt.groupby("leid")
for key,grp in G1:
    L.append([])
    lv= grp["current"].values
    for i in range(1,len(lv)):
        L.append( [t for l in lv[:i] for t in l] )

rslt["history"]= [ mkdict(l) for l in L ]
rslt["current"]= [ mkdict(l) for l in rslt.current.values ]

网友

2楼 · 编辑于 2024-09-28 03:24:29

我开发了Pandas的apply功能和定制Pandas的groupby功能

（定制熊猫团购积分：https://medium.com/@sean.turner026/applying-custom-functions-to-groupby-objects-in-pandas-61af58955569）

我还修改了你的输入，以显示一些可能的结果。你知道吗

代码如下所示

# defined the table copied from your question

table = """
leid     run_seq     cp_id     products    currency     amount
101           1       201        A           YEN         345
102           1       201        A           IDR         900
102           2       201        B           INR         223
101           2       202        A           USD         845
102           3       201        C           USD         345
"""

import pandas as pd
import numpy as np

with open("stackoverflow.csv", "w") as f:
    f.write(table)

df = pd.read_csv("stackoverflow.csv", delim_whitespace=True)
df = df.sort_values(by = ["leid", "run_seq"]).reset_index(drop = True)
# assigned using pandas apply in axis = 1
df["current"] = df.apply(lambda x: {x["cp_id"]: {x["products"]: {x["currency"]: x["amount"]}}}, axis = 1)


# defining a function to merge dictionaries
def Merge(dict1, dict2): 
    res = {**dict1, **dict2} 
    return res 

# defining a customised cumulative function dictionary
def cumsumdict(data):

    current_dict = [{}]

    for i in range(1, data.shape[0]):
        cp_id = list(data["current"].iloc[i-1])[0]
        product = list(data["current"].iloc[i-1][cp_id])[0]
        currency = list(data["current"].iloc[i-1][cp_id][product])[0]
        if cp_id in current_dict[-1]:
            # merge cp_id using dictionary merge if exist in previous trx
            cp_merger = Merge(current_dict[-1][cp_id], data["current"].iloc[i-1][cp_id])
            appender = current_dict[-1]
            appender[cp_id] = cp_merger
            if product in current_dict[-1][cp_id]:
                # merge products using dictionary merge if exist in previous trx
                product_merger = Merge(current_dict[-1][cp_id][product], data["current"].iloc[i-1][cp_id][product])
                appender = current_dict[-1]
                appender[cp_id][product] = product_merger
                if currency in current_dict[-1][cp_id][product]:
                    # sum the currency value 
                    currency_merger = current_dict[-1][cp_id][product][currency] + data["current"].iloc[i-1][cp_id][product][currency]
                    appender = current_dict[-1]
                    appender[cp_id][product][currency] = currency_merger



        else:
            appender = Merge(current_dict[-1], data["current"].iloc[i-1])

        current_dict.append(appender)

    data["history"] = current_dict

    return data

df = df.groupby(["leid"]).apply(cumsumdict)
df = df[["leid", "run_seq", "current", "history"]]
print(df)

上述功能将导致

  leid  run_seq                     current  \
0   101        1  {201: {'A': {'YEN': 345}}}   
3   101        2  {202: {'A': {'USD': 845}}}   
1   102        1  {201: {'A': {'IDR': 900}}}   
2   102        2  {201: {'B': {'INR': 223}}}   
4   102        3  {201: {'C': {'USD': 345}}}   

                                         history  
0                                             {}  
3                     {201: {'A': {'YEN': 345}}}  
1                                             {}  
2  {201: {'A': {'IDR': 900}, 'B': {'INR': 446}}}  
4  {201: {'A': {'IDR': 900}, 'B': {'INR': 446}}}

相关问题更多 >

编程相关推荐

热门问题

热门文章

根据数据帧中的多个列和行的某些条件构造新列

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >