使用Keras进行三重态学习会产生意想不到的结果

2024-10-02 02:30:53 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在尝试实现Keras三重学习。 下面是我使用的get\u triplet()的两个版本:

“琐碎的”:

def get_triplet(nSplitIdx, bIsTrain):
  positiveClass = np.random.choice(arrLabels)

  # Depending train or validate, select range. 
  # say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
  if(bIsTrain):
    nMinIdx = 0
    nMaxIdx = nSplitIdx - 1
  else:
    nMinIdx = nSplitIdx
    nMaxIdx = NUM_OF_IMAGES_PER_CLASS - 1 - TESTING_IMAGES_PER_CLASS

  # Get 3 indices: for base image and for positive example, from same class. And one more for negative example.
  # TBD: figure (test) if SAME image should be used in a positive pair, like [img[1], img[1]]?
  nImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)

  while nImageIdx[0] == nImageIdx[1]:
    nImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))

  negativeClass = np.random.choice(arrLabels)
  while negativeClass['Id'] == positiveClass['Id']:
    negativeClass = np.random.choice(arrLabels)

  negativeFileName = negativeClass['ImageNames'][nImageIdx[2]]

  # nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass. 
  # Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
  return nImageIdx, positiveClass, negativeClass

和“半硬”(虽然不那么优雅):

# Used to mine ONLY
def getDistance(a, b):
  distance = (a - b)**2

  distance = sum(distance)

  return distance

graph = tf.get_default_graph()

def getExampleIdx(positiveClass, negativeClass, bIsPositive, nAncorIdx):
  global embedding_model

  # tensorflow model is not loaded and used in the same thread. One workaround is to force tensorflow to use the gloabl default graph.
  global graph
  with graph.as_default():
    imgAncor = positiveClass['Images'][nAncorIdx]

    if(bIsPositive):
      cClass = positiveClass
    else:
      cClass = negativeClass

    arrExamples = []
    arrExamples.append(np.array(imgAncor, dtype="float32") / 255.)  # Add ancor image as 0th element

    if(bIsPositive):
      for i in range(TRAINING_IMAGES_PER_CLASS):
        #  img = loadImage(cClass, i, datagen)
        #  arrExamples.append(np.array(img, dtype="float32") / 255.)
        # Note: no augmentation here
        img = cClass['Images'][i]
        arrExamples.append(np.array(img, dtype="float32") / 255.)
    else:
      # For a batch

      if(BATCH_SIZE > len(arrLabels)):
        nMinRange = len(arrLabels)
        nStartClassIdx = 0
      else:
        nMinRange = BATCH_SIZE + 1
        nStartClassIdx = np.random.choice(range(len(arrLabels) - nMinRange)) 

      for nClassId in range(nStartClassIdx, nStartClassIdx + nMinRange): 
        cClass = arrLabels[nClassId]
        if cClass['Id'] == positiveClass['Id']:
          continue

        # We do it to speed it up, but generally, it should be uncommented: for instead of rand
        i = np.random.choice(range(TRAINING_IMAGES_PER_CLASS))
        #for i in range(TRAINING_IMAGES_PER_CLASS):
        # Note: no augmentation here
        img = cClass['Images'][i]
        arrExamples.append(np.array(img, dtype="float32") / 255.)

    arrPredictionsPos = embedding_model.predict([arrExamples])

    # Get distances between 0th predictions and other predictions
    arrDistancesPos = []
    for i in range(1, len(arrPredictionsPos)):
      arrDistancesPos.append(getDistance(arrPredictionsPos[0], arrPredictionsPos[i]))
      #print("{}, ".format(arrDistancesPos[i - 1]))


    if(bIsPositive):
      #print("Positive: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmax()]))
      return np.array(arrDistancesPos).argmax(), positiveClass
    else:
      #print("Negative: {}, ".format(arrDistancesPos[np.array(arrDistancesPos).argmin()]))
      nNegativeIdx = np.array(arrDistancesPos).argmin() % TRAINING_IMAGES_PER_CLASS
      return nNegativeIdx, arrLabels[nStartClassIdx + nNegativeIdx // TRAINING_IMAGES_PER_CLASS]

# ---

def get_triplet(nSplitIdx, bIsTrain):
  # Select random class
  positiveClass = np.random.choice(arrLabels)

  negativeClass = np.random.choice(arrLabels)
  while negativeClass['Id'] == positiveClass['Id']:
    negativeClass = np.random.choice(arrLabels)

  # Depending train or validate, select range. 
  # say we have 10 images per class, and 70% does to train. Then 0-6 (train); 7-9 (valid, at least 3)
  if(bIsTrain):
    nMinIdx = 0
    nMaxIdx = nSplitIdx
  else:
    nMinIdx = nSplitIdx
    nMaxIdx = NUM_OF_IMAGES_PER_CLASS - TESTING_IMAGES_PER_CLASS

  arrImageIdx = np.random.choice(range(nMinIdx, nMaxIdx), 3)

  if(bIsTrain):
    arrImageIdx[1], positiveClass = getExampleIdx(positiveClass, positiveClass, True, arrImageIdx[0])
    arrImageIdx[2], negativeClass = getExampleIdx(positiveClass, negativeClass, False, arrImageIdx[0])
  else:
    while arrImageIdx[0] == arrImageIdx[1]:
      arrImageIdx[1] = np.random.choice(range(nMinIdx, nMaxIdx))

  #negativeFileName = negativeClass['ImageNames'][arrImageIdx[2]]

  # nImageIdx is an array of 3 indexes: base and positive in positiveClass and negative in negativeClass. 
  # Ex: positiveClass[nImageIdx[0], positiveClass[nImageIdx[1]], negativeClass[nImageIdx[2]]
  return arrImageIdx, positiveClass, negativeClass

现在,发电机:

from time import time  
#t0 = time()
#t1 = time()
#print('get_triplet takes %f' %(t1-t0))

def gen(bIsTrain):
  #nSplitIdx = int(NUM_OF_IMAGES_PER_CLASS * TESTING_SPLIT)
  while True:
    arrBaseExamples = []
    arrPositiveExamples = []
    arrNegativeExamples = []

    for i in range(BATCH_SIZE):
      nImageIdx, positiveClass, negativeClass = get_triplet(TRAINING_IMAGES_PER_CLASS, bIsTrain)

      #t0 = time()
      baseExampleImg = loadImage(positiveClass, nImageIdx[0], datagen)      
      positiveExampleImg = loadImage(positiveClass, nImageIdx[1], datagen)
      negativeExampleImg = loadImage(negativeClass, nImageIdx[2], datagen)
      #t1 = time()
      #print('loaded in %f' %(t1-t0))

      arrBaseExamples.append(baseExampleImg)
      arrPositiveExamples.append(positiveExampleImg)
      arrNegativeExamples.append(negativeExampleImg)

    #base = preprocess_input(np.array(arrBaseExamples)) / 255. #'a' #preprocess_input(np.array(arrBaseExamples))
    base = np.array(arrBaseExamples) / 255.
    #train_datagen.fit(base)

    #positive = preprocess_input(np.array(arrPositiveExamples)) / 255.
    positive = np.array(arrPositiveExamples) / 255.
    #train_datagen.fit(positive)

    #negative = preprocess_input(np.array(arrNegativeExamples)) / 255.
    negative = np.array(arrNegativeExamples) / 255.
    #train_datagen.fit(negative)

    label = None

    yield ({'anchor_input': base, 'positive_input': positive, 'negative_input': negative}, label) 

最后,三重态损失函数(从Kaggle偷来):

ALPHA = 0.2  # Triplet Loss Parameter

def triplet_loss(inputs, dist='sqeuclidean', margin='maxplus'):
    anchor, positive, negative = inputs
    positive_distance = K.square(anchor - positive)
    negative_distance = K.square(anchor - negative)
    if dist == 'euclidean':
        positive_distance = K.sqrt(K.sum(positive_distance, axis=-1, keepdims=True))
        negative_distance = K.sqrt(K.sum(negative_distance, axis=-1, keepdims=True))
    elif dist == 'sqeuclidean':
        positive_distance = K.sum(positive_distance, axis=-1, keepdims=True)
        negative_distance = K.sum(negative_distance, axis=-1, keepdims=True)

    loss = positive_distance - negative_distance + ALPHA
    if margin == 'maxplus':
        loss = K.maximum(0.0, 1 + loss)
    elif margin == 'softplus':
        loss = K.log(1 + K.exp(loss))
    return K.mean(loss)

模式为“迁移学习”:

def createModel(nL2):
  base_model = ResNet50(weights='imagenet', include_top=False, pooling='max')
  for layer in base_model.layers:
    layer.trainable = False

  x = base_model.output

  x = Dropout(0.6, name="classifier_dropout")(x)

  x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_0", kernel_regularizer=regularizers.l2(nL2))(x)
  x = Dense(EMBEDDING_DIM, activation='relu', name="classifier_dense_1", kernel_regularizer=regularizers.l2(nL2))(x)
  x = Dense(EMBEDDING_DIM, activation='softmax', name="classifier_dense", kernel_regularizer=regularizers.l2(nL2))(x)

  x = Lambda(lambda  x: K.l2_normalize(x,axis=1), name="lambda")(x)
  embedding_model = Model(base_model.input, x, name="embedding")

  input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
  anchor_input = Input(input_shape, name='anchor_input')
  positive_input = Input(input_shape, name='positive_input')
  negative_input = Input(input_shape, name='negative_input')

  anchor_embedding = embedding_model(anchor_input)
  positive_embedding = embedding_model(positive_input)
  negative_embedding = embedding_model(negative_input)

  inputs = [anchor_input, positive_input, negative_input]
  outputs = [anchor_embedding, positive_embedding, negative_embedding]

  triplet_model = Model(inputs, outputs)
  triplet_model.add_loss(K.mean(triplet_loss(outputs)))

  return embedding_model, triplet_model

我创建了四个卡格尔的狗物种并教它们(原始数据集有120个,我只使用了四个):

BATCH_SIZE = 16
EPOCHS = 200

arrParams = [[0.8, 3]]

checkpoint = ModelCheckpoint(best_weights_filepath, monitor="val_loss", save_best_only=True, save_weights_only=True, mode='auto')
callbacks_list = [checkpoint]  # , early]

gen_train = gen(True)
gen_valid = gen(False)

for i in range(0, len(arrParams)):
  nL2 = arrParams[i][0]
  EMBEDDING_DIM = arrParams[i][1]

  deleteSavedNet(best_weights_filepath)

  embedding_model, triplet_model = createModel(nL2)

  nNumOfClasses = len(arrLabels)
  nNumOfTrainSamples = TRAINING_IMAGES_PER_CLASS * nNumOfClasses
  nNumOfValidSamples = VALIDATION_IMAGES_PER_CLASS * nNumOfClasses
  STEP_SIZE_TRAIN = nNumOfTrainSamples // BATCH_SIZE
  if(STEP_SIZE_TRAIN == 0):
    STEP_SIZE_TRAIN = 1

  STEP_SIZE_VALID = nNumOfValidSamples // BATCH_SIZE
  if(STEP_SIZE_VALID == 0):
    STEP_SIZE_VALID = 1

  triplet_model.compile(loss=None, optimizer="adam", metrics=['binary_accuracy']) #metrics=['accuracy'])

  history = triplet_model.fit_generator(gen_train, validation_data=gen_valid, 
    epochs=EPOCHS, verbose=1, steps_per_epoch=STEP_SIZE_TRAIN, validation_steps=STEP_SIZE_VALID, callbacks=callbacks_list)

  print(nL2, EMBEDDING_DIM)
  plotHistoryLoss()  

它学习到:

deleteSavedNet():File removed
Initializing model
Finished initializing model
Epoch 1/200
6/6 [==============================] - 44s 7s/step - loss: 8.0306 - val_loss: 7.6388
Epoch 2/200
6/6 [==============================] - 15s 2s/step - loss: 7.0082 - val_loss: 6.7307

。。。 历元200/200 6/6[===============================]-15s 3s/步-损耗:0.7046-值损耗:0.7043

但是(注意我用了3分。嵌入)当我绘制结果时,我看到的是线,而不是碎片:

nL2 = arrParams[0][0]
EMBEDDING_DIM = arrParams[0][1]

embedding_model, triplet_model = createModel(nL2)

loadBestModel()

def data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
  i = 0
  arrImages = []
  arrImageLabels = []
  arrImageClasses = []
  for nImageIdx in range(len(arrAllImages)):
    if(i == 0):
      arrImages = []
      arrImageLabels = []
      arrImageClasses = []

    i += 1

    arrImg = img_to_array(arrAllImages[nImageIdx])
    arrImg = datagen.random_transform(arrImg) / 255.
    arrImg = np.array(arrImg, dtype="float32")    

    arrImages.append(arrImg)
    arrImageLabels.append(arrAllImageLabels[nImageIdx])
    arrImageClasses.append(arrAllImageClasses[nImageIdx])

    if i == BATCH_SIZE:
      i = 0
      arrImages = np.array(arrImages)
      yield arrImages, arrImageLabels, arrImageClasses

  if i != 0:
    arrImages = np.array(arrImages)
    yield arrImages, arrImageLabels, arrImageClasses

  raise StopIteration()   

arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []

for cClass in arrLabels:
  for nIdx in range(TRAINING_IMAGES_PER_CLASS):
    arrAllImages.append(cClass['Images'][nIdx])
    arrAllImageLabels.append(cClass['ImageNames'][nIdx])
    arrAllImageClasses.append(cClass['Id'])

train_preds  = []
train_file_names = []
train_class_names = []

np.random.seed(7)

for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
  predicts = embedding_model.predict(imgs)
  predicts = predicts.tolist()
  train_preds += predicts
  train_file_names += fnames
  train_class_names += classes
train_preds = np.array(train_preds)

arrAllImages = []
arrAllImageLabels = []
arrAllImageClasses = []

for cClass in arrLabels:
  #for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
  #for nIdx in range(TRAINING_IMAGES_PER_CLASS):
  for nIdx in range(TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS, TRAINING_IMAGES_PER_CLASS + VALIDATION_IMAGES_PER_CLASS + TESTING_IMAGES_PER_CLASS):
    arrAllImages.append(cClass['Images'][nIdx])
    arrAllImageLabels.append(cClass['ImageNames'][nIdx])
    arrAllImageClasses.append(cClass['Id'])

test_preds  = []
test_file_names = []
test_class_names = []

np.random.seed(7)

for imgs, fnames, classes in data_generator_simple(arrAllImages, arrAllImageLabels, arrAllImageClasses):
  predicts = embedding_model.predict(imgs)
  predicts = predicts.tolist()
  test_preds += predicts
  test_file_names += fnames
  test_class_names += classes
test_preds = np.array(test_preds)

neigh = NearestNeighbors(n_neighbors=6)
neigh.fit(train_preds)
#neigh.fit(arrTrainingClasterCenters)

distances_test, neighbors_test = neigh.kneighbors(test_preds)
distances_test, neighbors_test = distances_test.tolist(), neighbors_test.tolist()

data = pd.read_csv(working_path + "DogRecognizer/dogs/train_dogs.csv")
file_id_mapping = {k: v for k, v in zip(data.Image.values, data.Id.values)}

preds_str = []
arrSearchPositions = []

for filepath, distance, neighbour_ in zip(test_file_names, distances_test, neighbors_test):
    sample_result = []
    sample_classes = []

    for d, n in zip(distance, neighbour_):
      #class_train = arrLabels[n]['Id']
      train_file = train_file_names[n].split(os.sep)[-1]
      class_train = file_id_mapping[train_file]

      sample_classes.append(class_train)
      sample_result.append((class_train, d))

    #if "new_whale" not in sample_classes:
    #    sample_result.append(("new_whale", 0.1))
    sample_result.sort(key=lambda x: x[1], reverse=True)

    sample_result = sample_result[:5]
    preds_str.append(" ".join([x[0] for x in sample_result]))

nTotalSuccess = 0
for i, strClassNames in enumerate(preds_str):
  if(test_class_names[i] in strClassNames):
    strContains = ": Yes"
    nTotalSuccess += 1
  else:
    strContains = ": No"
  #print(test_class_names[i], ": ", strClassNames, " (", strContains, ")")  

print ("Success rate: ", nTotalSuccess / (i+1) )     

import pylab as pl
from sklearn import neighbors, datasets
import matplotlib.cm as cm

h = 0.02

knn=neighbors.KNeighborsClassifier(n_neighbors=6)
knn.fit(test_preds, test_class_names)

x_min, x_max = test_preds[:,0].min() - .01, test_preds[:,0].max() + .02
y_min, y_max = test_preds[:,1].min() - .01, test_preds[:,1].max() + .02
z_min, z_max = test_preds[:,2].min() - .01, test_preds[:,2].max() + .01
xx, yy, zz = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h), np.arange(z_min, z_max, h))
#Z = knn.predict(np.c_[xx.ravel(), yy.ravel(), zz.ravel()])

color_space = []
for i in range(len(test_class_names)):
  if(test_class_names[i] == 'pembroke'):
    color_space.append('red')
    #test_preds[:,1][i] += 0.01
  elif(test_class_names[i] == 'maltese_dog'):
    color_space.append('green')
    #test_preds[:,1][i] += 0.02
  elif(test_class_names[i] == 'gordon_setter'):
    color_space.append('orange')
    #test_preds[:,1][i] += 0.03
  else:
    color_space.append('blue')


from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
import random


fig = pyplot.figure(figsize=(16, 8))
ax = Axes3D(fig)
pl.set_cmap(pl.cm.Paired)
ax.scatter(test_preds[:,0], test_preds[:,1], test_preds[:,2], c= color_space)
pyplot.show()

情节如下:

enter image description here

为什么是线,不是碎屑


Tags: intestforinputmodelnptrainarray

热门问题