Source code for quantizeml.analysis.quantization_error_api

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2024 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
__all__ = ["measure_layer_quantization_error", "measure_cumulative_quantization_error"]

import keras
import onnx

from .quantization_error import (keras_layer_quantization_error, onnx_node_quantization_error,
                                 keras_cumulative_quantization_error,
                                 onnx_cumulative_quantization_error)



[docs]
def measure_layer_quantization_error(fmodel, qmodel, target_layer=None, batch_size=16, seed=None):
    """Measures the layer quantization error

    Returns a dictionary where the keys are the name of each layer and the values are a dictionary
    composed of the set of the following metrics:

        * Symmetrical Mean Absolute Percentage Error (SMAPE): :func:`tools.metrics.SMAPE`
        * Saturation: Percentage of how many values in the quantized layer saturate

    Example:
        >>> summary = measure_layer_quantization_error(fmodel, qmodel)
        >>> assert isinstance(summary[a_layer_name], dict)
        >>> assert "SMAPE" in summary[a_layer_name]

    Args:
        fmodel (onnx.ModelProto or tf.keras.Model): the float model.
        qmodel (onnx.ModelProto or tf.keras.Model): the quantized version of `fmodel`.
        target_layer (str, optional): computation error is performed only in the target layer/node,
            expanding the analysis to each output channel. Defaults to None.
        batch_size (int, optional): the batch size of the samples to be generated.
            It allows a better metrics generalization, but consumes more resources. Defaults to 16.
        seed (int, optional): a random seed. Defaults to None.

    Returns:
        dict: the quantization error for each layer

    Notes:
        * Layers/Nodes that do not produce quantization errors will not be taken into account
          (e.g. QuantizedReshape).
    """
    keras_model_types = (keras.Sequential, keras.Model)
    # Check both models have the same type
    if isinstance(fmodel, onnx.ModelProto) and isinstance(qmodel, onnx.ModelProto):
        summary = onnx_node_quantization_error(fmodel,
                                               qmodel,
                                               target_node=target_layer,
                                               batch_size=batch_size,
                                               seed=seed)
    elif isinstance(fmodel, keras_model_types) and isinstance(qmodel, keras_model_types):
        summary = keras_layer_quantization_error(fmodel,
                                                 qmodel,
                                                 target_layer=target_layer,
                                                 batch_size=batch_size,
                                                 seed=seed)
    else:
        model_types = (onnx.ModelProto, *keras_model_types)
        raise TypeError(f"Both models should be the same type, one of {model_types}. "
                        f"Received: {type(fmodel)} and {type(fmodel)}.")
    return summary




[docs]
def measure_cumulative_quantization_error(fmodel,
                                          qmodel,
                                          target_layer=None,
                                          batch_size=16,
                                          seed=None):
    """Measures the cumulative quantization error

    Returns a dictionary where the keys are the name of each layer and the values are a dictionary
    composed of the set of the following metrics:

        * Symmetrical Mean Absolute Percentage Error (SMAPE): :func:`tools.metrics.SMAPE`
        * Saturation: Percentage of how many values in the quantized layer saturate

    Each metric measures the quantization error from the input to the layer.

    Example:
        >>> summary = measure_cumulative_quantization_error(fmodel, qmodel)
        >>> assert isinstance(summary[a_layer_name], dict)
        >>> assert "SMAPE" in summary[a_layer_name]

    Args:
        fmodel (onnx.ModelProto or tf.keras.Model): the float model.
        qmodel (onnx.ModelProto or tf.keras.Model): the quantized version of `fmodel`.
        target_layer (str, optional): error computation is performed only in the target layer/node,
            expanding the analysis to each output channel. Defaults to None.
        batch_size (int, optional): the batch size of the samples to be generated.
            It allows a better metrics generalization, but consumes more resources. Defaults to 16.
        seed (int, optional): a random seed. Defaults to None.

    Returns:
        dict: the quantization error for each layer

    Notes:
        * Layers/Nodes that do not produce quantization errors will not be taken into account
          (e.g. QuantizedReshape).
    """
    keras_model_types = (keras.Sequential, keras.Model)
    # Check both models have the same type
    if isinstance(fmodel, onnx.ModelProto) and isinstance(qmodel, onnx.ModelProto):
        summary = onnx_cumulative_quantization_error(fmodel,
                                                     qmodel,
                                                     target_node=target_layer,
                                                     batch_size=batch_size,
                                                     seed=seed)
    elif isinstance(fmodel, keras_model_types) and isinstance(qmodel, keras_model_types):
        summary = keras_cumulative_quantization_error(fmodel,
                                                      qmodel,
                                                      target_layer=target_layer,
                                                      batch_size=batch_size,
                                                      seed=seed)
    else:
        model_types = (onnx.ModelProto, *keras_model_types)
        raise TypeError(f"Both models should be the same type, one of {model_types}. "
                        f"Received: {type(fmodel)} and {type(fmodel)}.")
    return summary