Keras transfer learning
Keras Tranfer Learning Feature extraction for big dataset
- 1. Building the image dataset
- 2. Using Keras for deep learning feature extraction
- 3. Produce Generators
- 3. Incremental learning
- Referenzen:
import numpy as np
import tensorflow as tf
import os
import glob
import argparse
import random
#from resnet import ResNet
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from pysim import config
import importlib
importlib.reload(config)
import glob
import cv2
import shutil
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from sklearn.model_selection import train_test_split
import random
import pickle
from imutils import paths
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
#from sklearn.linear_model import LogisticRegression
#from sklearn.metrics import classification_report
for split in (config.TRAIN, config.VAL, config.TEST):
imagePaths = glob.glob(os.path.sep.join([config.ORIG_INPUT_DATASET, split, '*']))
for imagePath in imagePaths:
filename = imagePath.split(os.path.sep)[-1]
label = config.CLASSES[int(filename.split("_")[0])]
newPath = os.path.sep.join([config.BASE_PATH, split, label])
if not os.path.exists(newPath):
os.makedirs(newPath)
newPathFile = os.path.sep.join([newPath, filename])
shutil.copy2(imagePath, newPathFile)
model = tf.keras.applications.VGG16(weights = "imagenet", include_top = False)
le = None
for split in (config.TRAIN, config.VAL, config.TEST):
print("[INFO] process ... {} split".format(split))
imagePaths = glob.glob(os.path.sep.join([config.BASE_PATH, split, '*', '*']))[:94]
random.shuffle(imagePaths)
labels_ = [imagePath.split(os.path.sep)[-1] for imagePath in imagePaths]
labels = [config.CLASSES[int(filename.split("_")[0])] for filename in labels_]
if le is None:
le = LabelEncoder()
le.fit_transform(labels)
cvsPath = os.path.sep.join([config.BASE_CSV_PATH, "{}.csv".format(split)])
csv = open(cvsPath, "w")
for (b, i) in enumerate(range(0, len(imagePaths), config.BATCH_SIZE)):
print("[INFO] processing batch {}/{}".format(b + 1,\
int(np.ceil(len(imagePaths) / float(config.BATCH_SIZE)))))
batchPaths = imagePaths[i : i + config.BATCH_SIZE]
batchLabels = labels[i : i + config.BATCH_SIZE]
batchLabels = le.transform(batchLabels)
batchImages = []
for imagePath in batchPaths:
# load the input image using the Keras helper utility
# while ensuring the image is resized to 224x224 pixels
image = tf.keras.preprocessing.image.load_img(imagePath, target_size=(224, 224))
image = tf.keras.preprocessing.image.img_to_array(image)
# preprocess the image by (1) expanding the dimensions and
# (2) subtracting the mean RGB pixel intensity from the ImageNet dataset
image = np.expand_dims(image, axis=0)
image = tf.keras.applications.vgg16.preprocess_input(image)
# add the image to the batch
batchImages.append(image)
# pass the images through the network and use the outputs a
# our actual features, then reshape the features into a flattened volume
batchImages = np.vstack(batchImages)
features = model.predict(batchImages, batch_size=config.BATCH_SIZE)
features = features.reshape((features.shape[0], 7 * 7 * 512))
# loop over the class labels and extracted features
for (label, vec) in zip(batchLabels, features):
# construct a row that exists of the class label and
# extracted features
vec = ",".join([str(v) for v in vec])
csv.write("{},{}\n".format(label, vec))
# close the CSV file
csv.close()
# serialize the label encoder to disk
f = open(config.LE_PATH, "wb")
f.write(pickle.dumps(le))
f.close()
print("Done")
np.array([[1,2,3]])
np.array([[4,5,6]])
def csv_feature_generator(inputPath, bs, numClasses, mode="train"):
# open the input file for reading
f = open(inputPath, "r")
# loop indefinitely
while True:
# initialize our batch of data and labels
data = []
labels = []
# keep looping until we reach our batch size
while len(data) < bs:
# attempt to read the next row of the CSV file
row = f.readline()
# check to see if the row is empty, indicating we have
# reached the end of the file
if row == "":
# reset the file pointer to the beginning of the file
# and re-read the row
f.seek(0)
row = f.readline()
# if we are evaluating we should now break from our
# loop to ensure we don't continue to fill up the
# batch from samples at the beginning of the file
if mode == "eval":
break
# extract the class label and features from the row
row = row.strip().split(",")
label = row[0]
label = to_categorical(label, num_classes=numClasses)
features = np.array(row[1:], dtype="float")
# update the data and label lists
data.append(features)
labels.append(label)
# yield the batch to the calling function
yield (np.array(data), np.array(labels))
le = pickle.loads(open(config.LE_PATH, "rb").read())
# derive the paths to the training, validation, and testing CSV files
trainPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TRAIN)])
valPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.VAL)])
testPath = os.path.sep.join([config.BASE_CSV_PATH,"{}.csv".format(config.TEST)])
# determine the total number of images in the training and validation
# sets
totalTrain = sum([1 for l in open(trainPath)])
totalVal = sum([1 for l in open(valPath)])
# extract the testing labels from the CSV file and then determine the
# number of testing images
testLabels = [int(row.split(",")[0]) for row in open(testPath)]
totalTest = len(testLabels)
3. Incremental learning
The Python script we’re implementing in this section will be responsible for:
- Constructing the simple feedforward NN architecture
- Implementing a CSV data generator used to yield batches of labels + feature vectors to the NN
- Training the simple NN using the data generator
- Evaluating the feature extractor
Referenzen:
Adrian Rosebrock, OpenCV Face Recognition, PyImageSearch, https://www.pyimagesearch.com/, accessed on 3 January, 2021> www:https://www.pyimagesearch.com/2019/05/27/keras-feature-extraction-on-large-datasets-with-deep-learning/