擅长:python、mysql、java
<p>在固定地址数据的情况下,通常是这样做的:</p>
<pre><code>import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
import pandas as pd
large = StringIO('''col1, col2, col3, col4, col5, col6, addr
234, 453,34535,342,634,636, Ken street
562, 345,6753,835,864,967,St Pauls''')
addr = StringIO('''addr,lat,lon
baker street, lat1, lon1
ken street, lat2,lon2
paul street, lat3, lon3
St Pauls, lat4, lon4''')
df_large = pd.read_csv(large, sep=',')
df_large.columns = df_large.columns.str.strip()
df_large.addr = df_large.addr.apply(lambda x: x.lower().strip())
df_addr = pd.read_csv(addr, sep=',')
df_addr.addr = df_addr.addr.apply(lambda x: x.lower().strip())
df_large = pd.merge(df_large, df_addr, how='left', on='addr')
</code></pre>