ValueError:X有500000个功能，但ExtraTreeClassifier需要7个功能作为输入

import numpy as np import pandas as pd from collections import Counter import datetime from sklearn.model_selection import train_test_split from sklearn.model_selection import RepeatedStratifiedKFold import gc import warnings warnings.simplefilter('ignore') df = pd.read_csv('../cleaned_train.csv', index_col=0) df['click_time'] = pd.to_datetime(df['click_time']) df.info() <class 'pandas.core.frame.DataFrame'> Int64Index: 10000000 entries, 0 to 9999999 Data columns (total 9 columns): # Column Dtype --- ------ ----- 0 ip int64 1 app int64 2 device int64 3 os int64 4 channel int64 5 click_time datetime64[ns] 6 is_attributed int64 7 hour int64 8 day int64 dtypes: datetime64[ns](1), int64(8) memory usage: 762.9 MB X= df.drop(columns=['is_attributed', 'click_time']) y= df['is_attributed'] #Undersample data from imblearn.under_sampling import RandomUnderSampler rus = RandomUnderSampler() X_res, y_res = rus.fit_resample(X, y) X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size = 0.33, random_state = 0) from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import GridSearchCV import pickle # ExtraTreesClassifier ec = ExtraTreesClassifier(max_depth=None, n_estimators=50) ec.fit(X_train, y_train) y_predec=ec.predict(X_test) pickle.dump(gsec,open('model.pkl','wb'))

# importing necessary libraries and functions import numpy as np import pandas as pd from flask import Flask, request, jsonify, render_template, make_response from werkzeug.utils import secure_filename from werkzeug.datastructures import FileStorage import pickle import io from io import StringIO import csv app = Flask(__name__) #Initialize the flask App @app.route('/') # Homepage def home(): return render_template('index.html') @app.route('/predict',methods=['GET', 'POST']) def predict(): ''' For rendering results on HTML GUI ''' # retrieving values from form if request.method == 'POST': f = request.files['data_file'] if not f: return "No file" stream = io.StringIO(f.stream.read().decode("UTF8"), newline=None) csv_input = csv.reader(stream) # print(csv_input) for row in csv_input: print(row) stream.seek(0) result = stream.read() df = pd.read_csv('newcleaned_test.csv') attribute = df['is_attributed'] ip = df['ip'] print (attribute) # load the model from disk loaded_model = pickle.load(open('model.pkl', 'rb')) prediction = loaded_model.predict([attribute]) print (prediction) return 'prediction' if __name__ == "__main__": app.run(debug=True)

1条回答

网友

1楼 · 发布于 2024-09-26 18:21:19

你在这里有一些误解

首先，从代码中，您可以看到模型在7列上作为输入进行训练[ip, app, device, os, channel, hour, day]。并对模型进行训练，从is_attributed列预测值。因此，为模型列表提供7个值->；接收1个值作为输出。这个值似乎是0或1，取决于输入的7个值

其次，我们现在可以进入烧瓶部分。基本上，这里要做的是加载dataframe并选择一列（attribute = df['is_attributed']）。如果您的dataframe有50000行，并且您选择了一列，则表示您选择了50000个值！然后你们试着把它发送给模型，模型需要7个值作为输入。从我的角度来看，您似乎希望在test数据帧的每一行上运行模型

为此，您需要：

加载test数据帧
检查dataframe中是否只有7列（[ip, app, device, os, channel, hour, day]）。如果有更多列，请删除所有其他列
检查数据帧中的每一行（总共50000行中的每一行）
使用行中的7个值作为输入运行模型
模型的输出附加到python列表中
运行50000次之后，您将拥有50000个值的python列表
返回此列表

相关问题更多 >

编程相关推荐

热门问题

热门文章