import numpy as np
import tensorflow as tf
import glob

1.TFRecord format

  • doesn't know anything about image formats
  • can save both dense arrays or image formats
  • in contrast to imread and imsave TF decouples reading/decoding and encoding/writting

Steps

  • Encode the features as types compatible with tf.train.Example
  • This stores the raw image string feature, as well as the height, width, depth, and arbitrary label feature.

2. Creating a tf.train.Example message

Suppose you want to create a tf.train.Example message from existing data. In practice, the dataset may come from anywhere, but the procedure of creating the tf.train.Example message from a single observation will be the same:

  • Within each observation, each value needs to be converted to a tf.train.Feature containing one of the 3 compatible types, using one of the functions above

  • You create a map (dictionary) from the feature name string to the encoded feature value produced in #1

  • The map produced in step 2 is converted to a Features message.

# with tf.train.Example.

def _create_bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  if isinstance(value, type(tf.constant(0))):
    value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _create_float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _create_int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
#freedom02 = tf.keras.preprocessing.image.load_img("images/freedom.png")
#freedom01_arr = tf.keras.preprocessing.image.img_to_array(freedom01)#image to array
#freedom02_arr = tf.keras.preprocessing.image.img_to_array(freedom02)
#freedom01_name = tf.keras.preprocessing.image.load_img("images/freedom.png")#load & decode image
#freedom02_name = tf.keras.preprocessing.image.load_img("images/freedom.png")
#print(freedom01_arr.shape)

3. Write a list of images to TFRecords

record_file = "images/TFRecords/my-tfR.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.png")
with tf.io.TFRecordWriter(record_file) as writer:
    for _, filename in enumerate(list_files):
        image_string = open(filename, 'rb').read()#reads each image in list in bytes format
        feature = {"raw_image": _create_bytes_feature(image_string)} #create a feature named values which contains the whole bytes array
        tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example 
        writer.write(tf_example.SerializeToString())
#image_string    

4. Write a list of images/labels to TFRecords

record_file = "images/TFRecords/my-tfR.tfrecords"
list_files = glob.glob("images/TFRecords/*.png")
labels = [0, 1]
images_labels = {
    list_files[0] : labels[0],
    list_files[1] : labels[1],
}
with tf.io.TFRecordWriter(record_file) as writer:
   for filename, label in images_labels.items(): 
        image_string = open(filename, 'rb').read()#reads each image in list in bytes format
        feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
                   "label": _create_int64_feature(label) } #create a feature named label which contains 0 or 1
        tf_example = tf.train.Example(features = tf.train.Features(feature=feature))#creates an example 
        writer.write(tf_example.SerializeToString())

5. Write a list of images with additional information to TFRecords

To be able to read the TFRecord files additional information such as original size/shape of image must be retained.

record_file = "images/TFRecords/my-tfR.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.png")
with tf.io.TFRecordWriter(record_file) as writer:
    for _, filename in enumerate(list_files):
        image_string = open(filename, 'rb').read()#reads each image as byte string
        #image_shape = tf.image.decode_png(image_string).shape # size of image to be retained
        image_shape = (600, 400, 3)
        feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
                   'height': _create_int64_feature(image_shape[0]),
                   'width' : _create_int64_feature(image_shape[1]),
                   "no_c"  : _create_int64_feature(image_shape[2]),
                  }#
        tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example 
        writer.write(tf_example.SerializeToString())
record_file = "images/TFRecords/my-tfR-JPEG.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.jpg")
with tf.io.TFRecordWriter(record_file) as writer:
    for _, filename in enumerate(list_files):
        image_string = open(filename, 'rb').read()#reads each image as byte string
        #image_shape = tf.image.decode_png(image_string).shape # size of image to be retained
        feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
                   'height': _create_int64_feature(image_shape[0]),
                    'width' : _create_int64_feature(image_shape[1]),
                    "no_c"  : _create_int64_feature(image_shape[2]),
                  }#
        tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example 
        writer.write(tf_example.SerializeToString())