基于输出到bool的列和行（替换'for'语句）

import pandas as pd df=pd.DataFrame(((5,12),(16,19),(7,14),(6,9),(17,18),(1,3)),columns=["start","end"]) dropIndexes=[] for i in range(len(df) - 1, 0, -1): start=(df.iloc[i]['start'] >= df.iloc[list(range(0, i))][['start']]).start & ( df.iloc[i]['start'] <= df.iloc[list(range(0, i))][['end']]).end end =(df.iloc[i]['end'] >= df.iloc[list(range(0, i))][['start']]).start & ( df.iloc[i]['end'] <= df.iloc[list(range(0, i))][['end']]).end if True in (start|end).values: dropIndexes.append(i) df=df.drop(dropIndexes) df

1条回答

网友

1楼 · 发布于 2024-10-02 18:18:46

假设start和end列没有负值。您可以将pandas.apply用于您的用例

import pandas as pd
df=pd.DataFrame(((5,12),(16,19),(7,14),(6,9),(17,18),(1,3)),columns=["start","end"])

start, end = -9999, -9999
def select_row(index_start, index_end):
    global start
    global end

    if start == -9999 and end == -9999:
        # case 1: initial case
        start = index_start
        end = index_end
        return True
    elif end < index_start:
        # case 2: towards the left
        end = index_end
        return True
    elif index_end < start:
        # case 3: towards the right
        start = index_start
        return True
    else:
        # case 4: overlapping
        return False

filt_index = df.apply(lambda row: select_row(row["start"], row["end"]), axis=1)
df = df.loc[filt_index, df.columns]
print(df)

另外，我不得不使用global变量，因为我得到了UnboundLocalError。另外，我不想将start和end作为参数传递给数据帧中每一行的apply()

相关问题更多 >

编程相关推荐

热门问题

热门文章