<pre><code>import pandas as pd
df1 = pd.DataFrame({'ID': ['111', '222', '333', '333'],'url':['vk.com/audio/12353546','twitter.com/lenad','avito.ru/phones','facebook.ru/chats']})
print " original df1 "
print df1
df2 = pd.DataFrame({
'Maincaregory':['Entertainment','Entertainment','Entertainment','Online shop','Entertainment'],
'Subcategory':['Social Network','Social Network','Social Network','Buys','Social Network Music'],
'domain': ['vk.com','twitter.com','facebook.com','avito.com','vk.com/audio']})
print "\n original df2 "
print df2
row = df1.shape[0]
dname =[]
for x in range(row):
name = df1.iloc[x]['url'].split(".")
if name[0] == 'vk':
cat = df1.iloc[x]['url'].split("/")
dname.append(cat[0]+"/"+cat[1])
else:
dname.append(name[0]+".com")
df1['domain']=dname
print "\n for merge df1 "
print df1
df3 = pd.merge(df1,df2, how='inner',on=['domain'])
df3= df3.drop('domain',1)
print "\n what you want "
print df3
</code></pre>
<p>结果:</p>
<pre><code> original df1
ID url
0 111 vk.com/audio/12353546
1 222 twitter.com/lenad
2 333 avito.ru/phones
3 333 facebook.ru/chats
original df2
Maincaregory Subcategory domain
0 Entertainment Social Network vk.com
1 Entertainment Social Network twitter.com
2 Entertainment Social Network facebook.com
3 Online shop Buys avito.com
4 Entertainment Social Network Music vk.com/audio
for merge df1
ID url domain
0 111 vk.com/audio/12353546 vk.com/audio
1 222 twitter.com/lenad twitter.com
2 333 avito.ru/phones avito.com
3 333 facebook.ru/chats facebook.com
what you want
ID url Maincaregory Subcategory
0 111 vk.com/audio/12353546 Entertainment Social Network Music
1 222 twitter.com/lenad Entertainment Social Network
2 333 avito.ru/phones Online shop Buys
3 333 facebook.ru/chats Entertainment Social Network
</code></pre>