我试着训练我的lstm模型,但是准确度,精确度,召回率和f1分数都是0。我从kaggle下载了心脏病数据集。代码如下:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.contrib import rnn
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score
heartt = pd.read_csv('heart.csv')
cols_to_norm = ['sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal']
heartt[cols_to_norm] = heartt[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
sex_people = tf.feature_column.numeric_column('sex')
c_p = tf.feature_column.numeric_column('cp')
trest_bps = tf.feature_column.numeric_column('trestbps')
cholestrol = tf.feature_column.numeric_column('chol')
fb_s= tf.feature_column.numeric_column('fbs')
rest_ecg = tf.feature_column.numeric_column('restecg')
thala_ch = tf.feature_column.numeric_column('thalach')
ex_ang = tf.feature_column.numeric_column('exang')
old_peak = tf.feature_column.numeric_column('oldpeak')
slo_pe = tf.feature_column.numeric_column('slope')
c_a = tf.feature_column.numeric_column('ca')
tha_l = tf.feature_column.numeric_column('thal')
ag_e = tf.feature_column.numeric_column('age')
age_buckets = tf.feature_column.bucketized_column(ag_e, boundaries=[20,30,40,50,60,70,80])
feat_cols = [sex_people ,c_p, trest_bps ,cholestrol ,fb_s,rest_ecg,thala_ch ,ex_ang, old_peak, slo_pe,c_a, tha_l, age_buckets]
x_data = heartt.drop('target',axis=1)
x_data.info()
labels = heartt['target']
X_train,X_test,y_train,y_test = train_test_split(x_data, labels, test_size=0.2, shuffle=False, random_state=42)
epochs = 8
n_classes = 1
n_units = 200
n_features = 13
batch_size = 35
xplaceholder= tf.placeholder('float',[None,n_features])
yplaceholder = tf.placeholder('float')
def recurrent_neural_network_model():
layer ={ 'weights': tf.Variable(tf.random_normal([n_units, n_classes])),'bias': tf.Variable(tf.random_normal([n_classes]))}
x = tf.split(xplaceholder, n_features, 1)
print(x)
lstm_cell = rnn.BasicLSTMCell(n_units)
outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
output = tf.matmul(outputs[-1], layer['weights']) + layer['bias']
return output
def train_neural_network():
logit = recurrent_neural_network_model()
logit = tf.reshape(logit, [-1])
cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logit, labels=yplaceholder))
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
tf.global_variables_initializer().run()
tf.local_variables_initializer().run()
for epoch in range(epochs):
epoch_loss = 0
i = 0
for i in range(int(len(X_train) / batch_size)):
start = i
end = i + batch_size
batch_x = np.array(X_train[start:end])
batch_y = np.array(y_train[start:end])
_, c = sess.run([optimizer, cost], feed_dict={xplaceholder: batch_x, yplaceholder: batch_y})
epoch_loss += c
i += batch_size
print('Epoch', epoch, 'completed out of', epochs, 'loss:', epoch_loss)
pred = tf.round(tf.nn.sigmoid(logit)).eval({xplaceholder: np.array(X_test), yplaceholder: np.array(y_test)})
f1 = f1_score(np.array(y_test), pred, average='macro')
accuracy=accuracy_score(np.array(y_test), pred)
recall = recall_score(y_true=np.array(y_test), y_pred= pred)
precision = precision_score(y_true=np.array(y_test), y_pred=pred)
print("F1 Score:", f1)
print("Accuracy Score:",accuracy)
print("Recall:", recall)
print("Precision:", precision)
train_neural_network()
这是我得到的输出:
^{pr2}$我很困惑我在这里可能错在哪里。为什么我得不到正确的准确度,准确度,f1分数和回忆能力?在
我看一下数据集,它看起来不像是LSTM模型的问题。LSTMs(以及所有rnn)都是用来预测连续输出的,它们是时间序列回归的神经网络等价物。我知道有些情况下(例如使用NLP的情绪分析)可以将LSTMs应用于分类问题,但事实似乎并非如此。这些数据是“时间的”,即数据集的每一行代表一个病人,数据序列不携带任何信息。在
当您需要一个具有数据先前状态(如时间序列)的“内存”模型时,使用LSTMs。如果你想应用LSTMs,我建议你改变数据集(你可以看看this huge list of ML datasets)。如果你的神经网络是用来分类的,那就转换成神经网络。为此,您可以检查my personal TensorFlow tutorial如何执行此操作。在
相关问题 更多 >
编程相关推荐