擅长:python、mysql、java
<p>你可以选择:</p>
<pre><code>import pandas as pd
import numpy as np
df = pd.DataFrame(['foo', 'bar', 'foo', 'baz', 'foo', 'bar',], columns=['name'])
# Create the groups order
ordered_names = df['name'].drop_duplicates().tolist() # ['foo', 'bar', 'baz']
# Find index of each element in the ordered list
df['duplication_index'] = df['name'].apply(lambda x: ordered_names.index(x) + 1)
# Discard non-duplicated entries
df.loc[~df['name'].duplicated(keep=False), 'duplication_index'] = np.nan
print(df)
# name duplication_index
# 0 foo 1.0
# 1 bar 2.0
# 2 foo 1.0
# 3 baz NaN
# 4 foo 1.0
# 5 bar 2.0
</code></pre>