import numpy as np
import tensorflow as tf
import os 
import glob
import argparse
import random
#from resnet import ResNet
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from pysim import config
import importlib
import cv2
import shutil
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
import pickle
from imutils import paths
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
1. Building the image dataset

for split in (config.TRAIN, config.VAL, config.TEST):
    imagePaths = glob.glob(os.path.sep.join([config.ORIG_INPUT_DATASET, split, '*']))
    for imagePath in imagePaths:
        filename = imagePath.split(os.path.sep)[-1]
        label = config.CLASSES[int(filename.split("_")[0])]
        newPath = os.path.sep.join([config.BASE_PATH, split, label])  
        if not os.path.exists(newPath):
        newPathFile = os.path.sep.join([newPath, filename]) 
        shutil.copy2(imagePath, newPathFile)
model = tf.keras.applications.VGG16(weights = "imagenet", include_top = False)
le = None

2. Using Keras for deep learning feature extraction

  • Use Keras to extract features via deep learning from each image in the dataset
  • Write the class labels + extracted features to disk in CSV format
for split in (config.TRAIN, config.VAL, config.TEST):
    print("[INFO] process ... {} split".format(split))
    imagePaths = glob.glob(os.path.sep.join([config.BASE_PATH, split, '*', '*']))[:94] 
    labels_ = [imagePath.split(os.path.sep)[-1] for imagePath in imagePaths]
    labels =  [config.CLASSES[int(filename.split("_")[0])] for filename in labels_]
    if le is  None:
                le = LabelEncoder()
    cvsPath = os.path.sep.join([config.BASE_CSV_PATH, "{}.csv".format(split)]) 
    csv = open(cvsPath, "w")
    for (b, i) in enumerate(range(0, len(imagePaths), config.BATCH_SIZE)):
            print("[INFO] processing batch {}/{}".format(b + 1,\
                                        int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
            batchPaths = imagePaths[i : i + config.BATCH_SIZE]
            batchLabels = labels[i :  i + config.BATCH_SIZE]
            batchLabels = le.transform(batchLabels)
            batchImages = []
            for imagePath in batchPaths:
                    # load the input image using the Keras helper utility
                    # while ensuring the image is resized to 224x224 pixels
                    image = tf.keras.preprocessing.image.load_img(imagePath, target_size=(224, 224))
                    image = tf.keras.preprocessing.image.img_to_array(image)
                    # preprocess the image by (1) expanding the dimensions and
                    # (2) subtracting the mean RGB pixel intensity from the ImageNet dataset
                    image = np.expand_dims(image, axis=0)
                    image = tf.keras.applications.vgg16.preprocess_input(image)
                    # add the image to the batch
            # pass the images through the network and use the outputs a
            # our actual features, then reshape the features into a flattened volume
            batchImages = np.vstack(batchImages)
            features = model.predict(batchImages, batch_size=config.BATCH_SIZE)
            features = features.reshape((features.shape[0], 7 * 7 * 512))                         
            # loop over the class labels and extracted features
            for (label, vec) in zip(batchLabels, features):
                    # construct a row that exists of the class label and
                    # extracted features
                    vec = ",".join([str(v) for v in vec])
                    csv.write("{},{}\n".format(label, vec))
# close the CSV file
# serialize the label encoder to disk
f = open(config.LE_PATH, "wb")
[INFO] process ... train split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
[INFO] process ... val split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
[INFO] process ... test split
[INFO] processing batch 1/3
[INFO] processing batch 2/3
[INFO] processing batch 3/3
3. Produce Generators

def csv_feature_generator(inputPath, bs, numClasses, mode="train"):
    # open the input file for reading
    f = open(inputPath, "r")
    # loop indefinitely
    while True:
        # initialize our batch of data and labels
        data = []
        labels = []
        # keep looping until we reach our batch size
        while len(data) < bs:
            # attempt to read the next row of the CSV file
            row = f.readline()
            # check to see if the row is empty, indicating we have
            # reached the end of the file
            if row == "":
                # reset the file pointer to the beginning of the file
                # and re-read the row
                row = f.readline()
                # if we are evaluating we should now break from our
                # loop to ensure we don't continue to fill up the
                # batch from samples at the beginning of the file
                if mode == "eval":
            # extract the class label and features from the row
            row = row.strip().split(",")
            label = row[0]
            label = to_categorical(label, num_classes=numClasses)
            features = np.array(row[1:], dtype="float")
            # update the data and label lists
        # yield the batch to the calling function
        yield (np.array(data), np.array(labels))
le = pickle.loads(open(config.LE_PATH, "rb").read())
# derive the paths to the training, validation, and testing CSV files
trainPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TRAIN)])
valPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.VAL)])
testPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TEST)])
# determine the total number of images in the training and validation
# sets
totalTrain = sum([1 for l in open(trainPath)])
totalVal = sum([1 for l in open(valPath)])
# extract the testing labels from the CSV file and then determine the
# number of testing images
testLabels = [int(row.split(",")[0]) for row in open(testPath)]
totalTest = len(testLabels)

3. Incremental learning

The Python script we’re implementing in this section will be responsible for:

  • Constructing the simple feedforward NN architecture
  • Implementing a CSV data generator used to yield batches of labels + feature vectors to the NN
  • Training the simple NN using the data generator
  • Evaluating the feature extractor


