import numpy as np
import tensorflow as tf
import os 
import glob
import argparse
import random
#from resnet import ResNet
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from pysim import config
import importlib
importlib.reload(config)
import glob
import cv2
import shutil
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
import random
import pickle
from imutils import paths
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import classification_report

1. Building the image dataset

for split in (config.TRAIN, config.VAL, config.TEST):
    imagePaths = glob.glob(os.path.sep.join([config.ORIG_INPUT_DATASET, split, '*']))
    for imagePath in imagePaths:
        filename = imagePath.split(os.path.sep)[-1]
        label = config.CLASSES[int(filename.split("_")[0])]
        newPath = os.path.sep.join([config.BASE_PATH, split, label])  
        if not os.path.exists(newPath):
            os.makedirs(newPath)
        newPathFile = os.path.sep.join([newPath, filename]) 
        shutil.copy2(imagePath, newPathFile)
model = tf.keras.applications.VGG16(weights = "imagenet", include_top = False)
le = None

2. Using Keras for deep learning feature extraction

  • Use Keras to extract features via deep learning from each image in the dataset
  • Write the class labels + extracted features to disk in CSV format
for split in (config.TRAIN, config.VAL, config.TEST):
    print("[INFO] process ... {} split".format(split))
    imagePaths = glob.glob(os.path.sep.join([config.BASE_PATH, split, '*', '*']))[:94] 
    random.shuffle(imagePaths)
    labels_ = [imagePath.split(os.path.sep)[-1] for imagePath in imagePaths]
    labels =  [config.CLASSES[int(filename.split("_")[0])] for filename in labels_]
    if le is  None:
                le = LabelEncoder()
                le.fit_transform(labels)
    cvsPath = os.path.sep.join([config.BASE_CSV_PATH, "{}.csv".format(split)]) 
    csv = open(cvsPath, "w")
    for (b, i) in enumerate(range(0, len(imagePaths), config.BATCH_SIZE)):
            print("[INFO] processing batch {}/{}".format(b + 1,\
                                        int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
            batchPaths = imagePaths[i : i + config.BATCH_SIZE]
            batchLabels = labels[i :  i + config.BATCH_SIZE]
            batchLabels = le.transform(batchLabels)
            batchImages = []
            for imagePath in batchPaths:
                    # load the input image using the Keras helper utility
                    # while ensuring the image is resized to 224x224 pixels
                    image = tf.keras.preprocessing.image.load_img(imagePath, target_size=(224, 224))
                    image = tf.keras.preprocessing.image.img_to_array(image)
                    # preprocess the image by (1) expanding the dimensions and
                    # (2) subtracting the mean RGB pixel intensity from the ImageNet dataset
                    image = np.expand_dims(image, axis=0)
                    image = tf.keras.applications.vgg16.preprocess_input(image)
                    # add the image to the batch
                    batchImages.append(image)
            # pass the images through the network and use the outputs a
            # our actual features, then reshape the features into a flattened volume
            batchImages = np.vstack(batchImages)
            features = model.predict(batchImages, batch_size=config.BATCH_SIZE)
            features = features.reshape((features.shape[0], 7 * 7 * 512))                         
            # loop over the class labels and extracted features
            for (label, vec) in zip(batchLabels, features):
                    # construct a row that exists of the class label and
                    # extracted features
                    vec = ",".join([str(v) for v in vec])
                    csv.write("{},{}\n".format(label, vec))
# close the CSV file
    csv.close()  
# serialize the label encoder to disk
f = open(config.LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close() 
print("Done")                                           
[INFO] process ... train split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
[INFO] process ... val split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
[INFO] process ... test split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
Done
np.array([[1,2,3]])
np.array([[4,5,6]])
array([[4, 5, 6]])

3. Produce Generators

def csv_feature_generator(inputPath, bs, numClasses, mode="train"):
    # open the input file for reading
    f = open(inputPath, "r")
    # loop indefinitely
    while True:
        # initialize our batch of data and labels
        data = []
        labels = []
        # keep looping until we reach our batch size
        while len(data) < bs:
            # attempt to read the next row of the CSV file
            row = f.readline()
            # check to see if the row is empty, indicating we have
            # reached the end of the file
            if row == "":
                # reset the file pointer to the beginning of the file
                # and re-read the row
                f.seek(0)
                row = f.readline()
                # if we are evaluating we should now break from our
                # loop to ensure we don't continue to fill up the
                # batch from samples at the beginning of the file
                if mode == "eval":
                    break
            # extract the class label and features from the row
            row = row.strip().split(",")
            label = row[0]
            label = to_categorical(label, num_classes=numClasses)
            features = np.array(row[1:], dtype="float")
            # update the data and label lists
            data.append(features)
            labels.append(label)
        # yield the batch to the calling function
        yield (np.array(data), np.array(labels))
le = pickle.loads(open(config.LE_PATH, "rb").read())
# derive the paths to the training, validation, and testing CSV files
trainPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TRAIN)])
valPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.VAL)])
testPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TEST)])
# determine the total number of images in the training and validation
# sets
totalTrain = sum([1 for l in open(trainPath)])
totalVal = sum([1 for l in open(valPath)])
# extract the testing labels from the CSV file and then determine the
# number of testing images
testLabels = [int(row.split(",")[0]) for row in open(testPath)]
totalTest = len(testLabels)

3. Incremental learning

The Python script we’re implementing in this section will be responsible for:

  • Constructing the simple feedforward NN architecture
  • Implementing a CSV data generator used to yield batches of labels + feature vectors to the NN
  • Training the simple NN using the data generator
  • Evaluating the feature extractor

Referenzen:

Adrian Rosebrock, OpenCV Face Recognition, PyImageSearch, https://www.pyimagesearch.com/, accessed on 3 January, 2021> www:https://www.pyimagesearch.com/2019/05/27/keras-feature-extraction-on-large-datasets-with-deep-learning/