Write Images to TFRecords Format
Image Processing Write Images to TFRecords Format
- 1.TFRecord format
- 2. Creating a tf.train.Example message
- 3. Write a list of images to TFRecords
- 4. Write a list of images/labels to TFRecords
- 5. Write a list of images with additional information to TFRecords
import numpy as np
import tensorflow as tf
import glob
1.TFRecord format
- doesn't know anything about image formats
- can save both dense arrays or image formats
- in contrast to imread and imsave TF decouples reading/decoding and encoding/writting
Steps
- Encode the features as types compatible with tf.train.Example
- This stores the raw image string feature, as well as the height, width, depth, and arbitrary label feature.
2. Creating a tf.train.Example message
Suppose you want to create a tf.train.Example message from existing data. In practice, the dataset may come from anywhere, but the procedure of creating the tf.train.Example message from a single observation will be the same:
-
Within each observation, each value needs to be converted to a tf.train.Feature containing one of the 3 compatible types, using one of the functions above
-
You create a map (dictionary) from the feature name string to the encoded feature value produced in #1
-
The map produced in step 2 is converted to a Features message.
# with tf.train.Example.
def _create_bytes_feature(value):
"""Returns a bytes_list from a string / byte."""
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _create_float_feature(value):
"""Returns a float_list from a float / double."""
return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _create_int64_feature(value):
"""Returns an int64_list from a bool / enum / int / uint."""
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
#freedom02 = tf.keras.preprocessing.image.load_img("images/freedom.png")
#freedom01_arr = tf.keras.preprocessing.image.img_to_array(freedom01)#image to array
#freedom02_arr = tf.keras.preprocessing.image.img_to_array(freedom02)
#freedom01_name = tf.keras.preprocessing.image.load_img("images/freedom.png")#load & decode image
#freedom02_name = tf.keras.preprocessing.image.load_img("images/freedom.png")
#print(freedom01_arr.shape)
record_file = "images/TFRecords/my-tfR.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.png")
with tf.io.TFRecordWriter(record_file) as writer:
for _, filename in enumerate(list_files):
image_string = open(filename, 'rb').read()#reads each image in list in bytes format
feature = {"raw_image": _create_bytes_feature(image_string)} #create a feature named values which contains the whole bytes array
tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example
writer.write(tf_example.SerializeToString())
#image_string
record_file = "images/TFRecords/my-tfR.tfrecords"
list_files = glob.glob("images/TFRecords/*.png")
labels = [0, 1]
images_labels = {
list_files[0] : labels[0],
list_files[1] : labels[1],
}
with tf.io.TFRecordWriter(record_file) as writer:
for filename, label in images_labels.items():
image_string = open(filename, 'rb').read()#reads each image in list in bytes format
feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
"label": _create_int64_feature(label) } #create a feature named label which contains 0 or 1
tf_example = tf.train.Example(features = tf.train.Features(feature=feature))#creates an example
writer.write(tf_example.SerializeToString())
record_file = "images/TFRecords/my-tfR.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.png")
with tf.io.TFRecordWriter(record_file) as writer:
for _, filename in enumerate(list_files):
image_string = open(filename, 'rb').read()#reads each image as byte string
#image_shape = tf.image.decode_png(image_string).shape # size of image to be retained
image_shape = (600, 400, 3)
feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
'height': _create_int64_feature(image_shape[0]),
'width' : _create_int64_feature(image_shape[1]),
"no_c" : _create_int64_feature(image_shape[2]),
}#
tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example
writer.write(tf_example.SerializeToString())
record_file = "images/TFRecords/my-tfR-JPEG.tfrecords"
#load all files in folder
list_files = glob.glob("images/TFRecords/*.jpg")
with tf.io.TFRecordWriter(record_file) as writer:
for _, filename in enumerate(list_files):
image_string = open(filename, 'rb').read()#reads each image as byte string
#image_shape = tf.image.decode_png(image_string).shape # size of image to be retained
feature = {"raw_image": _create_bytes_feature(image_string),#create a feature named values which contains the whole bytes array
'height': _create_int64_feature(image_shape[0]),
'width' : _create_int64_feature(image_shape[1]),
"no_c" : _create_int64_feature(image_shape[2]),
}#
tf_example = tf.train.Example(features=tf.train.Features(feature=feature))#creates an example
writer.write(tf_example.SerializeToString())