# toy data
df = pd.DataFrame({'speaker':['Stanley & Kevin', 'Everybody',
'Kevin speaks', 'The speaker is Franck', 'Nobody']})
personnages = ['Stanley','Kevin', 'Franck']
pattern = '|'.join(personnages)
s = (df['speaker'].str
.extractall(f'({pattern})') # extract all personnages
.groupby(level=0)[0] # group by df's row
.nunique().eq(1) # count the unique number
)
df.loc[s.index[s]]
import pandas as pd
speakers = ['Stanley', 'Kevin', 'Frank', 'Kevin & Frank']
df = pd.DataFrame([{'speaker': speaker} for speaker in speakers])
speaker
0 Stanley
1 Kevin
2 Frank
3 Kevin & Frank
r = '|'.join(speakers[:-1]) # gets all but the last one for the sake of example
# the ^ marks start of string, and $ is the end
df[df['speaker'].str.contains(f'^({r})$')]
speaker
0 Stanley
1 Kevin
2 Frank
我要做的是:
输出:
您需要在regex中表示行的开始和结束,这样它只包含一个名称:
相关问题 更多 >
编程相关推荐