Source code for cnn2snn.quantization

# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Model quantization API"""

import tensorflow as tf
from keras.models import clone_model
from keras.layers import Conv2D, SeparableConv2D, Dense, ReLU, Layer

from .quantization_ops import MaxQuantizer, MaxPerAxisQuantizer
from .quantization_layers import (QuantizedConv2D, QuantizedSeparableConv2D,
                                  QuantizedDense, ActivationDiscreteRelu,
                                  QuantizedActivation, QuantizedReLU)
from .transforms import sequentialize, invert_batchnorm_pooling, fold_batchnorm
from .cnn2snn_objects import cnn2snn_objects

keras_neural_layers = (Conv2D, SeparableConv2D, Dense)
cnn2snn_neural_layers = (QuantizedConv2D, QuantizedSeparableConv2D,
                         QuantizedDense)
supported_neural_layers = keras_neural_layers + cnn2snn_neural_layers



[docs]
def quantize(model,
             weight_quantization=0,
             activ_quantization=0,
             input_weight_quantization=None,
             fold_BN=True,
             quantizer_function=None):
    """Converts a standard sequential Keras model to a CNN2SNN Keras quantized
    model, compatible for Akida conversion.

    This function returns a Keras model where the standard neural layers
    (Conv2D, SeparableConv2D, Dense) and the ReLU activations are replaced with
    CNN2SNN quantized layers (QuantizedConv2D, QuantizedSeparableConv2D,
    QuantizedDense, QuantizedRelu).

    Several transformations are applied to the model:
    - the order of MaxPool and BatchNormalization layers are inverted so that
    BatchNormalization always happens first,
    - the batch normalization layers are folded into the previous layers.

    This new model can be either directly converted to akida, or first
    retrained for a few epochs to recover any accuracy loss.

    Args:
        model (tf.keras.Model): a standard Keras model
        weight_quantization (int): sets all weights in the model to have
            a particular quantization bitwidth except for the weights in the
            first layer.

            * '0' implements floating point 32-bit weights.
            * '2' through '8' implements n-bit weights where n is from 2-8 bits.
        activ_quantization (int): sets all activations in the model to have a
            particular activation quantization bitwidth.

            * '0' implements floating point 32-bit activations.
            * '1' through '8' implements n-bit weights where n is from 1-8 bits.
        input_weight_quantization (int): sets weight quantization in the first
            layer. Defaults to weight_quantization value.

            * 'None' implements the same bitwidth as the other weights.
            * '0' implements floating point 32-bit weights.
            * '2' through '8' implements n-bit weights where n is from 2-8 bits.
        fold_BN (bool): enable folding batch normalization layers with their
             corresponding neural layer.
        quantizer_function (function): callable that takes as argument the layer
            instance to be quantized and the corresponding default quantizer and
            returns the quantizer to use.

    Returns:
        tf.keras.Model: a quantized Keras model
    """

    # Overrides input weight quantization if None
    if input_weight_quantization is None:
        input_weight_quantization = weight_quantization

    if fold_BN:
        # Identify sequential branches
        seq_model = sequentialize(model)
        # Invert batch normalization and pooling
        model_t = invert_batchnorm_pooling(seq_model)
        # Fold batch norm layers with corresponding neural layers
        model_t = fold_batchnorm(model_t)
    else:
        # Keep original model without transformation
        model_t = model

    # Convert neural layers and ReLU to CNN2SNN quantized layers
    first_neural_layer = True

    def replace_layer(layer):
        nonlocal first_neural_layer
        if isinstance(layer, (ReLU, QuantizedActivation)):
            return _quantize_activation_layer(layer, activ_quantization)
        if type(layer) in supported_neural_layers:
            if first_neural_layer:
                bitwidth = input_weight_quantization
                first_neural_layer = False
            else:
                bitwidth = weight_quantization
            return _convert_to_quantized_layer(layer, bitwidth,
                                               quantizer_function)
        return layer.__class__.from_config(layer.get_config())

    with tf.keras.utils.custom_object_scope(cnn2snn_objects):
        new_model = clone_model(model_t, clone_function=replace_layer)
    new_model.set_weights(model_t.get_weights())

    return new_model




[docs]
def quantize_layer(model, target_layer, bitwidth, quantizer_function=None):
    """Quantizes a specific layer with the given bitwidth.

    This function returns a Keras model where the target layer is quantized.
    All other layers are preserved.
    If the target layer is a native Keras layer (Conv2D, SeparableConv2D, Dense,
    ReLU), it is replaced by a CNN2SNN quantized layer (QuantizedConv2D,
    QuantizedSeparableConv2D, QuantizedDense, ActivationDiscreteRelu). If
    the target layer is an already quantized layer, only the bitwidth is
    modified.

    Examples:

        >>> # Quantize a layer of a native Keras model
        >>> model = tf.keras.Sequential([
        ...     tf.keras.layers.Dense(5, input_shape=(3,)),
        ...     tf.keras.layers.Softmax()])
        >>> model_quantized = cnn2snn.quantize_layer(model,
        ...                                          target_layer=0,
        ...                                          bitwidth=4)
        >>> assert isinstance(model_quantized.layers[0], cnn2snn.QuantizedDense)
        >>> print(model_quantized.layers[0].quantizer.bitwidth)
        4

        >>> # Quantize a layer of an an already quantized layer
        >>> model_quantized = cnn2snn.quantize_layer(model_quantized,
        ...                                          target_layer=0, bitwidth=2)
        >>> print(model_quantized.layers[0].quantizer.bitwidth)
        2

    Args:
        model (tf.keras.Model): a standard Keras model
        target_layer: a standard or quantized Keras layer to be
            converted, or the index or name of the target layer.
        bitwidth (int): the desired quantization bitwidth. If zero, no
            quantization will be applied.
        quantizer_function (function): callable that takes as argument the layer
            instance to be quantized and the corresponding default quantizer and
            returns the quantizer to use.

    Returns:
        tf.keras.Model: a quantized Keras model

    Raises:
        ValueError: In case of invalid target layer
        ValueError: If bitwidth is not greater than zero
    """

    if not bitwidth > 0:
        raise ValueError("Only bitwidth greater than zero is supported. "
                         f"Receives bitwidth {bitwidth}.")

    if isinstance(target_layer, int):
        layer_to_quantize = model.layers[target_layer]
    elif isinstance(target_layer, str):
        layer_to_quantize = model.get_layer(target_layer)
    elif isinstance(target_layer, Layer):
        layer_to_quantize = target_layer
    else:
        raise ValueError("Target layer argument is not recognized")

    def replace_layer(layer):
        if layer == layer_to_quantize:
            if isinstance(layer, (ReLU, QuantizedActivation)):
                return _quantize_activation_layer(layer, bitwidth)
            if type(layer) in supported_neural_layers:
                return _convert_to_quantized_layer(layer, bitwidth,
                                                   quantizer_function)
            return layer.__class__.from_config(layer.get_config())
        return layer.__class__.from_config(layer.get_config())

    with tf.keras.utils.custom_object_scope(cnn2snn_objects):
        new_model = clone_model(model, clone_function=replace_layer)
    new_model.set_weights(model.get_weights())

    return new_model



def _convert_to_quantized_layer(layer, bitwidth, quantizer_function=None):
    """Quantizes a standard Keras layer (Conv2D, SeparableConv2D, Dense) or a
    CNN2SNN quantized layer (QuantizedConv2D, QuantizedSeparableConv2D,
    QuantizedDense) to a CNN2SNN quantized layer with given bitwidth.

    A native Keras layer will be converted to a quantized layer with a
    MaxPerAxisQuantizer if quantizer_function is not specified.

    Args:
        layer (tf.keras.Layer): a standard Keras (Conv2D, SeparableConv2D or
            Dense) or quantized (QuantizedConv2D, QuantizedSeparableConv2D,
            QuantizedDense) layer.
        bitwidth (int): the desired weight quantization bitwidth. If zero, the
            Keras neural layer will be returned as it is.
        quantizer_function (function): callable that takes as argument the layer
            instance to be quantized and the corresponding default quantizer and
            returns the quantizer to use.

    Returns:
        :obj:`keras.Layer`: a CNN2SNN quantized Keras layer

    Raises:
        ValueError: if a quantized layer is quantized with bitwidth 0.
    """
    config = layer.get_config()

    # Handle case where bitwidth=0
    if bitwidth == 0:
        if isinstance(layer, cnn2snn_neural_layers):
            raise ValueError(f"A quantized layer cannot be quantized with "
                             f"bitwidth 0. Receives layer {layer.name} of type "
                             f" {layer.__class__.__name__}.")
        return layer.__class__.from_config(config)

    # Set quantizer with expected bitwidth
    if 'quantizer' not in config:
        quantizer = MaxPerAxisQuantizer(bitwidth=bitwidth)
        if quantizer_function is not None:
            quantizer = quantizer_function(layer, quantizer)
        config['quantizer'] = quantizer
    else:
        config['quantizer']['config']['bitwidth'] = bitwidth

    # Function to handle unsupported arguments in config
    def pop_unsupported_args(class_type):
        for arg, default_value in class_type.unsupported_args.items():
            if (arg in config and config[arg] != default_value):
                raise RuntimeError(
                    f"Argument '{arg}' in layer '{layer.name}' is only "
                    f"supported with default value '{default_value}'. "
                    f"Receives '{config[arg]}'.")
            config.pop(arg, None)

    # Return quantized layer, based on the config
    if isinstance(layer, Conv2D):
        pop_unsupported_args(QuantizedConv2D)
        return QuantizedConv2D.from_config(config)

    if isinstance(layer, SeparableConv2D):
        if 'quantizer_dw' not in config:
            quantizer = MaxQuantizer(bitwidth=bitwidth)
            if quantizer_function is not None:
                quantizer = quantizer_function(layer, quantizer)
            config['quantizer_dw'] = quantizer
        else:
            config['quantizer_dw']['config']['bitwidth'] = bitwidth
        pop_unsupported_args(QuantizedSeparableConv2D)
        return QuantizedSeparableConv2D.from_config(config)

    if isinstance(layer, Dense):
        pop_unsupported_args(QuantizedDense)
        return QuantizedDense.from_config(config)

    return None


def _quantize_activation_layer(layer, bitwidth):
    """Quantizes a Keras ReLU layer or a CNN2SNN quantized activation to the
    given bitwidth. A ReLU layer is converted to an ActivationDiscreteRelu
    layer.

    Args:
        layer (tf.keras.Layer): an activation layer (ReLU or CNN2SNN quantized
            activation)
        bitwidth (int): the desired quantization bitwidth. If zero, the ReLU
            layer will be returned as it is.

    Returns:
        :obj:`keras.Layer`: a CNN2SNN quantized Keras layer

    Raises:
        ValueError: if a quantized activation layer is quantized with
            bitwidth 0.
    """

    if bitwidth == 0:
        if isinstance(layer, QuantizedActivation):
            raise ValueError(f"A quantized activation cannot be quantized with "
                             f"bitwidth 0. Receives layer {layer.name} of type "
                             f" {layer.__class__.__name__}.")
        return layer.__class__.from_config(layer.get_config())

    if isinstance(layer, ReLU):
        if layer.threshold != 0:
            print(f"Skipping ReLU layer {layer.name} with non-zero threshold")
            return layer.__class__.from_config(layer.get_config())
        # Instantiate a QuantizedReLU using the original ReLU max_value
        return QuantizedReLU(bitwidth, layer.max_value, name=layer.name)

    if isinstance(layer, ActivationDiscreteRelu):
        # Evaluate the ActivationDiscreteRelu maximum value
        max_value = layer.step * layer.levels
        # Convert it to a QuantizedReLU
        return QuantizedReLU(bitwidth, max_value, name=layer.name)

    config = layer.get_config()
    config['bitwidth'] = bitwidth
    return layer.__class__.from_config(config)