print df1
# age name
#0 22 foo
#1 50 bar
#2 70 aaa
print df2
# age name
#0 22 foo
#1 80 ccc
#2 50 bar
#3 30 baz
#4 10 bar
#5 40 bbb
#filter data for equal rows of both dataframes
df3 = df2[df2['name'].isin(df1['name']) & df2['age'].isin(df1['age'])]
print df3
# age name
#0 22 foo
#2 50 bar
#filter data of df2 which are not in df1
df4 = df2[~df2['name'].isin(df1['name']) | ~df2['age'].isin(df1['age'])]
print df4
# age name
#1 80 ccc
#3 30 baz
#4 10 bar
#5 40 bbb
# Rows that are in this df should be removed from df2
# The first two rows are in both dfs, the third isn't.
df1 = pd.DataFrame({
'name': ['foo', 'bar', 'bak'],
'age': [22, 50, 30]
})
df2 = pd.DataFrame({
'name': ['foo', 'bar', 'baz', 'bar'],
'age': [22, 50, 30, 10]
})
df1['is_in_first_df'] = True
# Select the necessary columns and merge both dfs using an outer join
# so that rows that are in df2 but not df1 aren't lost.
df2_ = pd.merge(df2, df1, on=['age', 'name'], how='outer').fillna(False)
# Now just remove all those rows from df2 and remove the flag column
df2_[~df2_.is_in_first_df].drop('is_in_first_col', axis=1)
您可以使用函数isin。在
这应该行得通,尽管承认这有点沉重。不过,您可以在
on
参数中为merge
定义任意数量的列(当然,前提是这些列都在这两个数据集中)。在相关问题 更多 >
编程相关推荐