Keras Tensorflow验证精度在使用子类语法与函数或顺序语法时有所不同

2024-10-04 09:28:43 发布

您现在位置:Python中文网/ 问答频道 /正文

我使用顺序语法、函数语法和子类语法重新实现了Keras MINST CNN示例

一切编译和运行都很好,但我注意到使用子类语法(35%)与顺序/函数语法(75%)在验证精度上有很大差异。模型架构应该是相同的,所以这让我很困惑

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten

# BUG: ClassCNN accuracy is only 36% compared to 75% for SequentialCNN / FunctionalCNN
# SequentialCNN   validation: | loss: 1.3756675141198293 | accuracy: 0.7430952
# FunctionalCNN   validation: | loss: 1.4285654685610816 | accuracy: 0.7835714
# ClassCNN        validation: | loss: 1.9851970995040167 | accuracy: 0.36214286
class ClassCNN(tf.keras.Model):

    def __init__(self, input_shape, output_shape, **kwargs):
        super(ClassCNN, self).__init__()
        self._input_shape  = input_shape   # = (28, 28, 1)
        self._output_shape = output_shape  # = 10

        self.conv1      = Conv2D(32, kernel_size=(3, 3), activation=tf.nn.relu)
        self.conv2      = Conv2D(64, kernel_size=(3, 3), activation=tf.nn.relu)
        self.maxpool    = MaxPooling2D(pool_size=(2, 2))
        self.dropout1   = Dropout(0.25, name='dropout1')
        self.flatten    = Flatten()
        self.dense1     = Dense(128, activation=tf.nn.relu)
        self.dropout2   = Dropout(0.5, name='dropout2')
        self.activation = Dense(self._output_shape, activation=tf.nn.relu)

        self.conv1.build(     (None,) + input_shape )
        self.conv2.build(     (None,) + tuple(np.subtract(input_shape[:-1],2)) + (32,) )
        self.maxpool.build(   (None,) + tuple(np.subtract(input_shape[:-1],4)) + (64,) )
        self.dropout1.build( tuple(np.floor_divide(np.subtract(input_shape[:-1],4),2)) + (64,) )
        self.dropout2.build( 128 )
        self.build(           (None,) + input_shape)


    def call(self, x, training=False, **kwargs):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.maxpool(x)
        if training:  x = self.dropout1(x)
        x = self.flatten(x)
        x = self.dense1(x)
        if training:  x = self.dropout2(x)
        x = self.activation(x)
        return x
import os

from tensorflow.keras import Input, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import plot_model


def FunctionalCNN(input_shape, output_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, kernel_size=(3, 3), activation='relu')(inputs)
    x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    x = Dropout(0.25)(x)
    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(output_shape, activation='softmax')(x)

    model = Model(inputs, x, name="FunctionalCNN")
    plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "FunctionalCNN.png"))
    return model
import os

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.utils import plot_model


def SequentialCNN(input_shape, output_shape):
    model = Sequential()
    model.add( Conv2D(32, kernel_size=(3, 3),
                     activation='relu',
                     input_shape=input_shape) )
    model.add( Conv2D(64, (3, 3), activation='relu') )
    model.add( MaxPooling2D(pool_size=(2, 2)) )
    model.add( Dropout(0.25) )
    model.add( Flatten() )
    model.add( Dense(128, activation='relu') )
    model.add( Dropout(0.5) )
    model.add( Dense(output_shape, activation='softmax') )

    plot_model(model, to_file=os.path.join(os.path.dirname(__file__), "SequentialCNN.png"))
    return model

main.py

#!/usr/bin/env python3
import multiprocessing
import os

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'  # 0, 1, 2, 3  # Disable Tensortflow Logging
os.chdir( os.path.dirname( os.path.abspath(__file__) ) )

import tensorflow as tf
import tensorflow.keras as keras
import time

from src.dataset import DataSet
from src.keras.examples.ClassCNN import ClassCNN
from src.keras.examples.ClassNN import ClassNN
from src.keras.examples.FunctionalCNN import FunctionalCNN
from src.keras.examples.SequentialCNN import SequentialCNN
from src.utils.csv import predict_to_csv

tf.random.set_seed(42)

timer_start = time.time()

dataset = DataSet()
config = {
    "verbose":      False,
    "epochs":       12,
    "batch_size":   128,
    "input_shape":  dataset.input_shape(),
    "output_shape": dataset.output_shape(),
}
print("config", config)

# BUG: ClassCNN accuracy is only 36% compared to 75% for SequentialCNN / FunctionalCNN
# SequentialCNN   validation: | loss: 1.3756675141198293 | accuracy: 0.7430952
# FunctionalCNN   validation: | loss: 1.4285654685610816 | accuracy: 0.7835714
# ClassCNN        validation: | loss: 1.9851970995040167 | accuracy: 0.36214286
models = {
    "SequentialCNN": SequentialCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
    "FunctionalCNN": FunctionalCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
    "ClassCNN": ClassCNN(
        input_shape=dataset.input_shape(),
        output_shape=dataset.output_shape()
    ),
}


for model_name, model in models.items():
    print(model_name)

    model.compile(loss=keras.losses.categorical_crossentropy,
                  optimizer=keras.optimizers.Adadelta(),
                  metrics=['accuracy'])

    model.summary()

    model.fit(
        dataset.data['train_X'], dataset.data['train_Y'],
        batch_size = config["batch_size"],
        epochs     = config["epochs"],
        verbose    = config["verbose"],
        validation_data = (dataset.data["valid_X"], dataset.data["valid_Y"]),
        use_multiprocessing = True, workers = multiprocessing.cpu_count()
    )

for model_name, model in models.items():
    score = model.evaluate(dataset.data['valid_X'], dataset.data['valid_Y'], verbose=config["verbose"])
    print(model_name.ljust(15), "validation:", '| loss:', score[0], '| accuracy:', score[1])

for model_name, model in models.items():
    predict_to_csv( model.predict(dataset.data['test_X']), f'../../../submissions/keras-examples/keras-examples-{model_name}.csv')

print("time:", int(time.time() - timer_start), "s")

输出:

./src/keras/examples/main.py 
config {'verbose': False, 'epochs': 12, 'batch_size': 128, 'input_shape': (28, 28, 1), 'output_shape': 10}
SequentialCNN
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0         
_________________________________________________________________
dropout (Dropout)            (None, 12, 12, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 9216)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               1179776   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
FunctionalCNN
Model: "FunctionalCNN"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
input_1 (InputLayer)         [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
ClassCNN
Model: "class_cnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
conv2d_4 (Conv2D)            multiple                  320       
_________________________________________________________________
conv2d_5 (Conv2D)            multiple                  18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 multiple                  0         
_________________________________________________________________
dropout1 (Dropout)           multiple                  0         
_________________________________________________________________
flatten_2 (Flatten)          multiple                  0         
_________________________________________________________________
dense_4 (Dense)              multiple                  1179776   
_________________________________________________________________
dropout2 (Dropout)           multiple                  0         
_________________________________________________________________
dense_5 (Dense)              multiple                  1290      
=================================================================
Total params: 1,199,882
Trainable params: 1,199,882
Non-trainable params: 0
_________________________________________________________________
SequentialCNN   validation: | loss: 1.370523907570612  | accuracy: 0.74964285
FunctionalCNN   validation: | loss: 1.4270000725700742 | accuracy: 0.78511906
ClassCNN        validation: | loss: 2.028766530354818  | accuracy: 0.35630953

SequentialCNN+FunctionalCNN两者产生相似的准确率(75%),但与ClassCNN准确率(35%)有着本质的不同。从视觉上看,这些模型看起来是一样的

有人能解释一下吗


Tags: fromimportselfnoneinputoutputmodelactivation