import numpy as np
import tensorflow as tf
import os 
import glob
import argparse
import random
#from resnet import ResNet
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from pysim import config
import importlib
importlib.reload(config)
import glob
import cv2

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

#from tensorflow.keras.applications import VGG16

In diesem Tutorial wird beschrieben, wie die Dimensionen des Input-Shape-Tensors zur Feinoptimierung mit Keras geändert werden können. Nachdem Sie diese Anleitung durchgearbeitet haben, werden Sie verstehen, wie Sie Transfer Learning auf Bilder mit anderen Bilddimensionen anwenden können, als die, auf die das CNN ursprünglich trainiert wurde.

1. Warum sollte man unterschiedliche Bildgrößen verwenden??

Dafür gibt es zwei häufige Gründe:

Ihre Eingabebilddimensionen sind erheblich kleiner als die, auf denen das CNN trainiert wurde, und eine Vergrößerung führt zu viele Artefakte ein und beeinträchtigt die Verluste/Genauigkeit dramatisch.
Ihre Bilder sind hochauflösend und enthalten kleine Objekte, die schwer zu erkennen sind. Eine Größenänderung auf die ursprünglichen Eingangsdimensionen des CNN beeinträchtigt die Genauigkeit und Sie gehen davon aus, dass eine Erhöhung der Auflösung Ihr Modell verbessern wird.

In diesen Szenarien würden Sie die Eingabeformdimensionen des CNN aktualisieren wollen und dann in der Lage sein, Transfer-Learning durchzuführen.

There are two common reasons:

Your input image dimensions are considerably smaller than what the CNN was trained on and increasing their size introduces too many artifacts and dramatically hurts loss/accuracy.
Your images are high resolution and contain small objects that are hard to detect. Resizing to the original input dimensions of the CNN hurts accuracy and you postulate increasing resolution will help improve your model.

In these scenarios, you would wish to update the input shape dimensions of the CNN and then be able to perform transfer learning.

from tensorflow.keras.applications import VGG16

2. Laden des Modelles: VGG16, ohne Ausgangsschicht

ursprüngliches Modell war trainiert für Bildergröße von (224, 224, 3)

base_model = VGG16(weights = "imagenet", include_top = False, input_tensor = tf.keras.layers.Input(shape = (224,224,3)))

3. VGG16 für eine Input - Bildgröße von (128, 128, 3)

ursprüngliches Modell war trainiert für Bildergröße von (128, 128, 3)

4. Kann man der Größe der Input-Bilder beliebig festellen?

Es gibt Grenzen, wie stark man die Bildgröße anpassen könnte, sowohl aus Sicht der Genauigkeit/Verluste als auch aufgrund von Beschränkungen durch das Netz selbst.

Beachten Sie die Tatsache, dass CNNs die Volumenabmessungen über zwei Methoden reduzieren:

Polling
Strided Convoluations

5. Wenn das Bild zu klein ist:

Wenn die Größe des Entritt-Bildes zu klein ist, die Volumendimensionen während der Forward-Propagation wird sich reduzieren und dann gehen dem Modell die Daten "aus"
Fehlermeldung kommt raus

6. Wenn das Bild zu groß ist:

Man wird keine Fehler an sich feststellen, aber es kann sein, dass das Netz keine angemessene Genauigkeit erreicht, weil nicht genügend Schichten im Netzwerk vorhanden sind um:
- Robuste, diskriminierende Filter zu lernen
- Die Größe des Volumens durch Pooling oder Strided Convolution natürlich zu schrumpfen
  - Keine Fehlermeldung
    Lösungen:
  - Finde ein anderes Modell das eine größere Eintritt-Bildgröße hat
  - Hyperparameter- Anpanssung ausprobieren
  - Weitere Schichte aufbauen, um so das Modell zu vergrößern

7.Load and preprocess data

pathsList = glob.glob(os.path.sep.join([config.BASE_PATH, "*", "*"]))
data = []
labels = []
#loop over all files in list
for imagePath in pathsList:
         # extract label of the file
        label = imagePath.split(os.path.sep)[-2]
        # load image, resize it to (128,128, 3), transform to "rgb"
        image = cv2.imread(imagePath)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, (128,128))
        # add current image and label
        data.append(image)
        labels.append(label)

data   = np.array(data)
labels = np.array(labels)

lb = LabelBinarizer()
labels = lb.fit_transform(labels)
labels = tf.keras.utils.to_categorical(labels)

(trainX, testX, trainY, testY) = train_test_split(data[:300,:, :,:], labels[:300,:], random_state =123, shuffle = True, test_size = 0.2)
print(trainX.shape, testX.shape, trainY.shape, testY.shape)

(240, 128, 128, 3) (60, 128, 128, 3) (240, 2) (60, 2)

trainAug = tf.keras.preprocessing.image.ImageDataGenerator()
valAug =  tf.keras.preprocessing.image.ImageDataGenerator()
# define the ImageNet mean subtraction (in RGB order) and set the
# the mean subtraction value for each of the data augmentation
# objects
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

#trainGen = trainAug.flow(trainX, trainY, batch_size = 4)

8. Laden des Modelles: VGG16, ohne Ausgangsschicht

ursprüngliches Modell war trainiert für Bildergröße von (224, 224, 3)

Two steps to work with diffrent size images:

the images in our datasets were reiszed at (128, 128, 3)
specify the input tensor at (128, 128, 3)

base_model = tf.keras.applications.VGG16(weights = "imagenet", include_top = False,\
                                         input_tensor = tf.keras.layers.Input(shape = (128,128,3)))
tf.keras.utils.plot_model(base_model, show_shapes = True)

x = base_model.output
x = tf.keras.layers.AveragePooling2D(pool_size = (4,4))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(10)(x)
x = tf.keras.layers.Dropout(0.1)(x)
output_ = tf.keras.layers.Dense(2)(x)

model = tf.keras.models.Model(inputs = [base_model.inputs], outputs = [output_])

for layer in base_model.layers:
    layer.trainable = False
    
optimizer = tf.keras.optimizers.Adam(lr =0.001) 
loss = tf.keras.losses.BinaryCrossentropy()
model.compile(optimizer = optimizer, loss = loss, metrics = ["accuracy"])

H = model.fit(trainAug.flow(trainX, trainY, batch_size = config.BATCH_SIZE),\
              steps_per_epoch = len(trainX)//config.BATCH_SIZE,
              validation_data = valAug.flow(testX, testY),
              validation_steps = len(testX)//config.BATCH_SIZE)

60/60 [==============================] - ETA: 0s - loss: 0.3005 - accuracy: 1.0000WARNING:tensorflow:Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches (in this case, 15 batches). You may need to use the repeat() function when building your dataset.
60/60 [==============================] - 79s 1s/step - loss: 0.3005 - accuracy: 1.0000 - val_loss: 0.0032 - val_accuracy: 1.0000

print("[INFO] evaluating network...")
predictions = model.predict(x=testX.astype("float32"), batch_size = config.BATCH_SIZE)
print(classification_report(testY.argmax(axis=1), predictions.argmax(axis=1)))
# plot the training loss and accuracy

[INFO] evaluating network...
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        60

    accuracy                           1.00        60
   macro avg       1.00      1.00      1.00        60
weighted avg       1.00      1.00      1.00        60

N = 1 # config.NUM_EPOCHS
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy on Dataset")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="lower left")
#plt.savefig("plot.png")

<matplotlib.legend.Legend at 0x7f8864e29410>

References

Adrian Rosebrock, OpenCV Face Recognition, PyImageSearch, https://www.pyimagesearch.com/, accessed on 3 January, 2021> www:https://www.pyimagesearch.com/2019/06/24/change-input-shape-dimensions-for-fine-tuning-with-keras/