将字符串转换为唯一的数值或Numpy

2024-09-27 00:20:57 发布

您现在位置:Python中文网/ 问答频道 /正文

我是一个初级的Python和深入学习。这对你们大多数人来说可能很容易,但我怎么做呢?你知道吗

如何将以下对象转换为唯一的数值?你知道吗

df['city'].unique()

array(['LIMA', 'VACAVILLE', 'CINCINNATI', 'GLASGOW', 'BOWLING GREEN',
   'LANCASTER', 'HOUSTON', 'SPRINGFIELD', 'RAPID CITY', 'FORT WORTH',
   'LAREDO', 'NEW YORK', 'CHARLESTON', 'PITTSBURGH',
   'WEST VALLEY CITY', 'CAYCE', 'HOT SPRINGS NATIO', 'CANTON',
   'FORT WAYNE', 'DU BOIS', 'DAYTON', 'MASON CITY', 'WASHINGTON',
   'LAKE OSWEGO', 'FAYETTEVILLE', 'SALT LAKE CITY', 'KNOXVILLE',
   'TURLOCK', 'MCALLEN', 'CENTERVILLE', 'ROCHESTER', 'OKLAHOMA CITY',
   'GAUTIER', 'DOYLESTOWN', 'ATLANTA', 'MEADVILLE', 'FORT MYERS',
   'ERIE', 'BEAUMONT', 'JACKSON', 'CLARKSVILLE', 'BETHLEHEM',
   'SAN ANTONIO', 'LAS VEGAS', 'ATHENS', 'SAN LUIS OBISPO', 'SEATTLE',
   'BRADENTON', 'TINLEY PARK', 'HUNTLEY', 'SYRACUSE', 'WHEELWRIGHT',
   'TOWSON', 'YONKERS', 'ARDEN HILLS', 'MARION', 'LIVONIA',
   'COLORADO SPRINGS', 'CURWENSVILLE', 'SAINT CHARLES', 'PETERSBURG',
   'SCOTTSDALE', 'SILVER SPRING', 'PORTLAND', 'BIRMINGHAM',
   'CEDARVILLE', 'CLERMONT', 'ASHEVILLE', 'SHREVEPORT', 'DRAPER',
   'WAVERLY', 'CANANDAIGUA', 'MOUNT PLEASANT', 'MARIETTA', 'MANKATO',
   'HARLINGEN', 'HATCH', 'MOBILE', 'POULSBO', 'GARDEN GROVE',
   'GIG HARBOR', 'OCONOMOWOC', 'MOUNT MORRIS', 'ORLANDO',
   'DODGE CITY', 'DILLSBURG', 'HUNTSVILLE', 'KANSAS CITY',
   'JACKSONVILLE', 'DULUTH', 'CITRUS HEIGHTS', 'ONEONTA', 'LOS LUNAS',
   'GIBSONIA', 'ROBINSON', 'VERNON HILLS', 'PHOENIX', 'DESTIN',
   'SHEPHERD', 'BROOKLYN', 'PLANO', 'WINTERS', 'JAMAICA', 'POWAY',
   'LEXINGTON', 'UPLAND', 'NEW ALBANY', 'GREENVILLE',
   'JEFFERSON CITY', 'ARLINGTON', 'BUFFALO', 'LOS ANGELES',
   'CHARLOTTE', 'WEST LAFAYETTE', 'GARY', 'COOPERSTOWN', 'GREAT BEND',
   'DAVISON', 'SMYRNA', 'MISSOURI CITY', 'MEMPHIS',
   'FORT WALTON BEACH', 'KISSIMMEE', 'BATAVIA', 'OLDSMAR', 'WYNNE',
   'ASHVILLE', 'FT BRAGG', 'TROY', 'SHAKER HTS', 'CLEVELAND HTS',
   'HAMBURG'], dtype=object)

我试着用这些数据来训练一个模型。你知道吗


Tags: 对象citydfnew数值sanwestmount
2条回答

IIUC需要^{}

df = pd.DataFrame({'city':list('abcddf')})
df['city1'] = pd.factorize(df['city'])[0]

或转换为categoricals并获取^{}

df['city'] = pd.Categorical(df['city'])
df['city1'] = df['city'].cat.codes

print (df)

  city  city1
0    a      0
1    b      1
2    c      2
3    d      3
4    d      3
5    f      4

你也可以试试^{}。如文档中所述,它对值介于0和n_classes-1之间的标签进行编码。你知道吗

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['city_num'] = le.fit_transform(df['city'])

print(df.head())
#             city  city_num
# 0           LIMA        72
# 1      VACAVILLE       122
# 2     CINCINNATI        21
# 3        GLASGOW        50
# 4  BOWLING GREEN        10
print(len(df.city.unique()))
# 132
print(len(set(df.city_num)))
# 132

然后可以将数字列转换为指示符列

from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder()
city_ind = ohe.fit_transform(df.city_num.values.reshape(-1, 1))

print(type(city_ind))
# <class 'scipy.sparse.csr.csr_matrix'>

print(city_ind.shape)
# (132, 132)

print(city_ind[0:2, ].toarray())
# [[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.]
#  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.
#    0.  0.  0.  0.  0.  0.]]

相关问题 更多 >

    热门问题