我试图用KNN算法预测汽车的MPG。我首先清理了数据,制作了一个测试和训练数据集,然后制作了一个规范化和非规范化的KNN函数。现在我试图通过KNN算法传递测试数据,然后创建一个所有预测的列表。然后我想用均方误差来分析我的预测。目前,我还无法设置传递测试数据的函数。任何指导都将不胜感激
import pandas as pd
import numpy as np
import math
from google.colab import drive
drive.mount('/content/drive')
pd.set_option('display.max_columns', 100)
vehicles = pd.read_csv('/content/drive/MyDrive/CS_167/vehicles (2).csv')
subset_cars = vehicles[vehicles["fuelType"] == 'Regular']
final_sub = subset_cars[["comb08", "year", "cylinders", "displ"]]
column_nulls = final_sub.isna().any()
Cylinder_no_null = final_sub.cylinders.dropna()
displ_no_null = final_sub.displ.dropna()
pure_data = final_sub.dropna()
# pure_data.head()
shuffled_data = pure_data.sample(frac=1, random_state=41)
test_data = shuffled_data.iloc[0:500]
train_data = shuffled_data.iloc[500:]
train_data_euc = train_data.copy()
test_data_euc = test_data.copy()
def Regression_KNN(MPG,train_data_euc,k):
train_data_euc['euc_dis'] = np.sqrt(
(MPG['year']-train_data_euc['year'])**2+
(MPG['cylinders']-train_data_euc['cylinders'])**2+
(MPG['displ']-train_data_euc['displ'])**2)
sorted_train_data = train_data_euc.sort_values(['euc_dis'])
prediction = sorted_train_data.iloc[0:k]['comb08'].mean()
return prediction
MPG ={}
MPG['year'] = 2020
MPG['cylinders'] = 4
MPG['displ'] = 5.2
print(f"The average MPG for this car is: %d" %Regression_KNN(MPG, train_data_euc, 5))
z_train_copy = train_data_euc.copy()
z_train_year_std = z_train_copy['year'].std()
z_train_year_mean = z_train_copy['year'].mean()
z_train_cylinders_std = z_train_copy['cylinders'].std()
z_train_cylinders_mean = z_train_copy['cylinders'].mean()
z_train_displ_std = z_train_copy['displ'].std()
z_train_displ_mean = z_train_copy['displ'].mean()
z_train_euc_std = z_train_copy['euc_dis'].std()
z_train_euc_mean = z_train_copy['euc_dis'].mean()
z_train_copy['year'] = (z_train_copy['year'] - z_train_year_mean)/z_train_year_std
z_train_copy['cylinders'] = (z_train_copy['cylinders'] - z_train_cylinders_mean)/z_train_cylinders_std
z_train_copy['displ'] = (z_train_copy['displ'] - z_train_displ_mean)/z_train_displ_std
z_train_copy['euc_dis'] = (z_train_copy['euc_dis'] - z_train_euc_mean)/z_train_euc_std
def Z_TRAIN_KNN(MPG, z_train_copy, k):
z_train_copy['euc_dis'] = np.sqrt(
(MPG['year']-z_train_copy['year'])**2+
(MPG['cylinders']-z_train_copy['cylinders'])**2+
(MPG['displ']-z_train_copy['displ'])**2)
z_train_sorted_data = z_train_copy.sort_values(['euc_dis'])
z_train_prediction = z_train_sorted_data.iloc[0:k]['comb08'].mean()
return z_train_prediction
MPG ={}
MPG['year'] = 2020
MPG['cylinders'] = 4
MPG['displ'] = 5.2
print(f"The average MPG for this car is: %d" %Z_TRAIN_KNN(MPG, z_train_copy, 5))
def regression_all_kNN(test_data_euc,z_train_data,k):
#apply the classify_kNN function to each item in the test data with the train
#data and k passed as the other two arguments. The result will be a series of
#the individual results.
for i in test_data:
z_train_data['euc_dis'] = np.sqrt(
(test_data['year']- z_train_data['year'])**2+
(test_data['cylinders']- z_train_data['cylinders'])**2+
(test_data['displ']- z_train_data['displ'])**2)
sorted_train_data = z_train_data.sort_values(['euc_dis'])
prediction = test_data.apply(regression_all_kNN,args=(z_train_data,k))
return prediction
predictions5NN = regression_all_kNN(test_data, train_data, 5)
目前没有回答
相关问题 更多 >
编程相关推荐