Source code for akida_models.imagenet.preprocessing

# ******************************************************************************
# Copyright 2022 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""
Preprocessing tools for ImageNet dataset.
"""

__all__ = ["RandomColorJitter", "ThreeAugment", "preprocess_image", "index_to_label"]

import numpy as np
import keras
import tensorflow as tf
from scipy.ndimage import gaussian_filter

from .imagenet_labels2names import imagenet_labels


class RandomColorJitter(keras.layers.Layer):
    """RandomColorJitter class.

    Randomly adds color jitter to an image. Color jitter means to add random brightness, contrast,
    saturation, and hue to an image. There is a 80% chance that an image will be randomly
    color-jittered. Taken on https://keras.io/examples/vision/barlow_twins/

    Args:
        proba(float, optional): Probability of applying the color jitter. Defaults to 0.8.
    """

    def __init__(self, *args, proba=0.8, **kwargs):
        super().__init__(*args, **kwargs)
        self.proba = proba

    @tf.function
    def call(self, image):
        def _color_jitter(image):
            image = tf.image.random_brightness(image, 0.8)
            image = tf.image.random_contrast(image, 0.4, 1.6)
            image = tf.image.random_saturation(image, 0.4, 1.6)
            image = tf.image.random_hue(image, 0.2)
            return image
        return tf.cond(
            tf.random.uniform([]) < self.proba, lambda: _color_jitter(image), lambda: image)


class ThreeAugment(keras.layers.Layer):
    """Define a simple data augmentation pipeline of three augmentations, following the explaining
    in the paper: https://arxiv.org/abs/2204.07118. For that, this augmentation choses one of:
        - GrayScale: This favors color invariance and give more focus on shapes.
        - Solarization: This adds strong noise on the colour to be more robust to the variation
          of colour intensity and so focus more on shape
        - Gaussian Blur: In order to slightly alter details in the image.
    """
    @tf.function
    def call(self, image):
        def _to_gray():
            return tf.image.grayscale_to_rgb(tf.image.rgb_to_grayscale(image))

        def _solarize():
            # Taken of: https://keras.io/examples/vision/barlow_twins/
            return tf.where(image < 10, image, 255 - image)

        def _gaussian_blur():
            # Taken of: https://keras.io/examples/vision/barlow_twins/
            s = np.random.random()
            return gaussian_filter(input=image, sigma=s)

        proba = tf.random.uniform([])
        cases = [(proba < 1 / 3, _to_gray), (proba < 2 / 3, _solarize)]
        return tf.case(cases, default=_gaussian_blur, name='3-Augment', exclusive=False)


DATA_AUGMENTATION = keras.Sequential([RandomColorJitter(), ThreeAugment()])


[docs]@tf.function
def preprocess_image(image, image_size, training=False, data_aug=None):
    """ ImageNet data preprocessing.

    Preprocessing includes cropping, and resizing for both training and
    validation images. Training preprocessing introduces some random distortion
    of the image to improve accuracy.

    Args:
        image (tf.Tensor): input image as a 3-D tensor
        image_size (tuple): desired image size
        training (bool, optional): True for training preprocessing, False for
            validation and inference. Defaults to False.
        data_aug (keras.Sequential, optional): data augmentation. Defaults to None.

    Returns:
        :obj:`tensorflow.Tensor`: preprocessed image
    """
    assert len(image_size) == 2, f"image_size should have 2 elements (H, W), received {image_size}."
    shape = tf.shape(image)

    if training:
        # For training: crop, flip and resize
        bbox_begin, bbox_size, _ = tf.image.sample_distorted_bounding_box(
            shape,
            tf.zeros([0, 0, 4], tf.float32),  # force using whole image
            use_image_if_no_bounding_boxes=True,
            min_object_covered=0.1,
            aspect_ratio_range=[0.75, 1.33],
            area_range=[0.05, 1.0],
            max_attempts=100)

        image = tf.slice(image, bbox_begin, bbox_size)
        image = tf.image.resize(image, image_size)

        # Make all data augmentation after resize to decrease computational cost
        image = tf.image.random_flip_left_right(image)
        if data_aug is not None:
            image = data_aug(image)
    else:
        # For validation/inference: aspect preserving resize and central crop
        height = tf.cast(shape[0], tf.float32)
        width = tf.cast(shape[1], tf.float32)

        # Scale image before cropping, keeping aspect ratio
        resize_min_h = np.round(image_size[0] * 1.143).astype(np.float32)
        resize_min_w = np.round(image_size[1] * 1.143).astype(np.float32)
        min_dim = tf.minimum(height, width)
        scale_ratio = (resize_min_h / min_dim, resize_min_w / min_dim)

        # Convert back to int for TF ops
        new_height = tf.cast(height * scale_ratio[0], tf.int32)
        new_width = tf.cast(width * scale_ratio[1], tf.int32)

        image = tf.image.resize(image, [new_height, new_width])

        # Second: central crop to desired image_size
        image = tf.image.resize_with_crop_or_pad(image, image_size[0], image_size[1])

    return tf.cast(image, tf.float32)


[docs]def index_to_label(index):
    """ Function to get an ImageNet label from an index.

    Args:
        index: between 0 and 999

    Returns:
        str: a string of comma separated labels
    """
    return imagenet_labels[index]