Source code for cnn2snn.calibration.calibration

# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Quantized model calibration tools"""

import keras
import tensorflow as tf

from ..quantization_layers import (QuantizedReLU, QuantizedDense,
                                   QuantizedConv2D, QuantizedSeparableConv2D)
from ..quantization_ops import WeightQuantizer

from ..transforms.clone import clone_model_with_weights


def create_submodel(model, first_layer, last_layer):
    """Create a submodel of a Keras Model

    Args:
        model (:obj:`keras.Model`): a Keras Model
        first_layer (:obj:`keras.Layer`): the submodel first layer
        last_layer (:obj:`keras.Layer`): the submodel last layer

    Returns:
        :obj:`keras.Model`: a Keras sub-model
    """
    submodel = keras.Sequential()
    input_shape = first_layer.input_shape[1:]
    submodel.add(keras.layers.Input(shape=input_shape))
    add_layer = False
    for layer in model.layers:
        if layer == first_layer:
            add_layer = True
        if add_layer:
            submodel.add(layer)
        if layer == last_layer:
            break
    return submodel


class NoneQuantizer(WeightQuantizer):
    """A WeightQuantizer that returns float weights
    """

    def scale_factor(self, _):
        return 1

    def quantize(self, w):
        return w


def disable_quantizers(layer):
    """Disable quantizers for the specified layer

    Args:
        layer (:obj:`keras.Layer`): the layer

    Returns:
        [:obj:`cnn2snn.quantization_ops.WeightQuantizer`]: the list of disabled
        quantizers.
    """
    quantizers = [layer.quantizer]
    layer.quantizer = NoneQuantizer(layer.quantizer.bitwidth)
    if hasattr(layer, 'quantizer_dw'):
        quantizers.append(layer.quantizer_dw)
        layer.quantizer_dw = NoneQuantizer(layer.quantizer_dw.bitwidth)
    return quantizers


def restore_quantizers(layer, quantizers):
    """Restores quantizers for the specified layer

    Args:
        layer (:obj:`keras.Layer`): the layer
        quantizers ([:obj:`cnn2snn.quantization_ops.WeightQuantizer`]): the list of disabled
            quantizers.
    """
    layer.quantizer = quantizers[0]
    if hasattr(layer, 'quantizer_dw'):
        layer.quantizer_dw = quantizers[1]


def next_activation(layer):
    """Find the next activation after the specified layer

    Args:
        layer (:obj:`keras.Layer`): the layer

    Returns:
        :obj:`keras.Layer`: the next activation layer or None

    """
    outbound_nodes = layer.outbound_nodes
    for node in outbound_nodes:
        next_layer = node.layer
        if isinstance(next_layer, QuantizedReLU):
            return next_layer
        activation = next_activation(next_layer)
        if activation is not None:
            return activation
    return None


def is_on_top(layer, bottom_layer):
    """Checks if a layer follows another layer

    Args:
        layer (:obj:`keras.Layer`): the top layer
        bottom_layer (:obj:`keras.Layer`): the previous layer

    Returns:
        bool: True if the layer is on top
    """
    outbound_nodes = bottom_layer.outbound_nodes
    if len(outbound_nodes) == 0:
        # Bottom layer has no successors
        return False
    for node in outbound_nodes:
        # Check if layer is on top of one of the outbound layers
        next_layer = node.layer
        if next_layer == layer or is_on_top(layer, next_layer):
            return True
    return False


def is_quantized_neural(layer):
    """Check is a layer is a quantized neural layer

    These layers are the only ones eligible for calibration.

    Args:
        layer (:obj:`keras.Layer`): the top layer

    Returns:
        bool: True if the layer can be calibrated
    """
    return isinstance(
        layer, (QuantizedDense, QuantizedConv2D, QuantizedSeparableConv2D))



[docs]
class QuantizationSampler():
    """A tool to inspect the layer outputs of a quantized model

    The sampler is initialized with a quantized model and a set of samples used
    for the evaluation of the layer outputs. An optional batch size can be
    specified to avoid out-of-memory errors when using a GPU.

    To evaluate the outputs of a specific layer, it must first be selected
    using the `select_layer` member.

    Once done, three methods are available to inspect the layer outputs:

    - `quantized_outputs` returns the actual outputs of the layer,
    - `float_outputs` returns the outputs of the layer if its weights were not
      quantized,
    - `quantization_error` applies the `keras.metrics.Metric` passed as
      arguments to the difference between the float and quantized outputs.

    Example:

        >>> # Evaluate the quantization MSE of a quantized layer
        >>> model = tf.keras.Sequential([
        ...     tf.keras.layers.Dense(5, input_shape=(3,)),
        ...     tf.keras.layers.ReLU()])
        >>> model_quantized = cnn2snn.quantize(model,
        ...                                    weight_quantization=4,
        ...                                    activ_quantization=4)
        >>> # Instantiate a QuantizationSampler with a few dataset samples
        >>> sampler = QuantizationSampler(model_quantized, samples)
        >>> # Select the quantized layer
        >>> sampler.select_layer(model_quantized.layers[0])
        >>> # Evaluate the Mean Squared Error
        >>> m = keras.metrics.MeanSquaredError()
        >>> mse = sampler.quantization_error(m)

    Args:
        model (:obj:`keras.Model`): a quantized Keras model
        samples (:obj:`np.ndarray`): a set of calibration samples
        batch_size (int): the batch size used for evaluation.
    """

    def __init__(self, model, samples, batch_size=None):
        self.model = model
        n_inputs = samples.shape[0]
        if batch_size is None:
            batch_size = n_inputs
        # We only process full batches
        self.n_batches = n_inputs // batch_size
        # Split samples by batches
        self.samples = []
        for i in range(self.n_batches):
            b_from = i * batch_size
            b_to = (i + 1) * batch_size
            self.samples.append(samples[b_from:b_to, :, :, :])
        self.inputs = None
        self.submodel = None
        self.layer = None

    def select_layer(self, layer, include_activation=False):
        """Select the layer to inspect

        Args:
            layer (:obj:`keras.Layer`): a quantized keras layer
            include_activation (bool): evaluate the outputs after activation
        """
        if not is_quantized_neural(layer):
            raise ValueError("Output layer must be a Quantized layer")
        # Evaluate the outputs immediately before the selected layer to save
        # computation time when repeatedly evaluating the layer outputs
        if self.submodel is None or not is_on_top(layer, self.layer):
            # Evaluate layer inputs from the beginning of the model
            input_layer = self.model.layers[0]
            self.inputs = []
            for i in range(self.n_batches):
                self.inputs.append(self.samples[i])
        else:
            # Evaluate layer inputs using the outputs of the current submodel
            input_layer = self.submodel.layers[-1].outbound_nodes[0].layer
            for i in range(self.n_batches):
                self.inputs[i] = self.quantized_outputs(i)
        if layer != input_layer:
            # Advance inputs
            prev_layer = layer.inbound_nodes[0].inbound_layers
            submodel = create_submodel(self.model, input_layer, prev_layer)
            for i in range(self.n_batches):
                self.inputs[i] = submodel(self.inputs[i])
        # Create the submodel including the layer (and optionally its activation)
        activation = next_activation(layer) if include_activation else None
        if activation is not None:
            self.submodel = create_submodel(self.model, layer, activation)
        else:
            self.submodel = create_submodel(self.model, layer, layer)
        # Remember current layer
        self.layer = layer

    def quantized_outputs(self, batch_index=0):
        """Evaluates the quantized outputs of the selected layer

        Args:
            batch_index (int): the samples batch index

        Returns:
            :obj:`tf.Tensor`: the quantized outputs
        """
        if self.submodel is None:
            raise SystemError("Set output layer before requesting data")
        if batch_index < 0 or batch_index >= self.n_batches:
            raise ValueError("Invalid batch index")
        return self.submodel(self.inputs[batch_index])

    def float_outputs(self, batch_index=0):
        """Evaluates the 'float' outputs of the selected layer

        This disables the weights quantization and evaluates the outputs of the
        layer as if it was using float weights.

        Args:
            batch_index (int): the samples batch index

        Returns:
            :obj:`tf.Tensor`: the quantized outputs
        """
        if self.submodel is None:
            raise SystemError("Set output layer before requesting data")
        if batch_index < 0 or batch_index >= self.n_batches:
            raise ValueError("Invalid batch index")
        quantizers = disable_quantizers(self.layer)
        outputs = self.submodel(self.inputs[batch_index])
        restore_quantizers(self.layer, quantizers)
        return outputs

    def quantization_error(self, metric):
        """Evaluates the quantization error of the selected layer

        This evaluates the difference between the layer quantized outputs and
        the outputs if the layer was using float weights using the Metric
        passed as parameter.

        Args:
            metric (:obj:`keras.metrics.Metric`): the Metric for evaluation

        Returns:
            the Metric results (usually a single float)

        """
        metric.reset_state()
        for i in range(self.n_batches):
            metric.update_state(self.float_outputs(i),
                                self.quantized_outputs(i))
        return metric.result().numpy()



def previous_neural(layer):
    """Find the previous neural layer before the specified layer

    Args:
        layer (:obj:`keras.Layer`): the layer

    Returns:
        :obj:`keras.Layer`: the next activation layer or None

    """
    inbound_nodes = layer.inbound_nodes
    for node in inbound_nodes:
        previous_layer = node.inbound_layers
        if is_quantized_neural(previous_layer):
            return previous_layer
        neural_layer = previous_neural(previous_layer)
        if neural_layer is not None:
            return neural_layer
    return None


def activations_rescaling(model, samples, batch_size=None):
    """iterate over the layers of a quantized model, evaluating
    the pre-activations before each QuantizedReLu layer,
    and adjusting the max_value parameter of each QuantizedReLu
    accordingly.

    Args:
        model (:obj:`keras.Model`): a Keras Model
        samples (:obj:`np.ndarray`): a set of calibration samples
        batch_size (int): the batch size used for evaluation.

    """
    clone_model = clone_model_with_weights(model)
    sampler = QuantizationSampler(clone_model, samples, batch_size)
    for layer in clone_model.layers:
        if not isinstance(layer, QuantizedReLU):
            continue
        previous_neural_layer = previous_neural(layer)
        sampler.select_layer(previous_neural_layer, True)
        float_output = sampler.float_outputs()
        max_val = tf.reduce_max(float_output)
        if max_val < layer.max_value_:
            layer.max_value_ = tf.constant(max_val, dtype=tf.float32)

    return clone_model