Source code for quantizeml.tensors.qfloat

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2022 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
import tensorflow as tf

from .qtensor import QTensor, slice_tensor_by_index
from .fixed_point import FixedPoint
from ..debugging import assert_equal



[docs]
class QFloat(QTensor):
    """A Tensor of FixedPoint values and scales representing float numbers

    The QFloat is a dual representation of a float Tensor combining FixedPoint
    values and float scales.

    The QFloat is typically used to represent float tensors whose quantization
    range is not 'optimal' for FixedPoint quantization: the original tensor is
    first divided by the scales to be aligned on optimal ranges, then quantized
    to FixedPoint values.

    When converting back to float, values are dequantized and multiplied by the
    scales to obtain the approximated float tensor.

    Args:
        fp (:obj:`FixedPoint`): a FixedPoint of values
        scales (tf.Tensor): a Tensor of scales
    """
    fp: FixedPoint = FixedPoint(1.0, 32, 0)
    scales: tf.Tensor = 1.0

    def __init__(self, fp, scales):
        self.scales = tf.cast(scales, tf.float32)
        self.fp = fp
        self.shape = self.fp.shape

    @property
    def values(self):
        return self.fp.values

    @property
    def frac_bits(self):
        return self.fp.frac_bits

    @property
    def value_bits(self):
        return self.fp.value_bits

    @property
    def name(self):
        return self.fp.name

    @property
    def per_tensor(self):
        return self.fp.per_tensor and self.scales.shape.ndims == 0


[docs]
    @staticmethod
    def max_frac_bits(value_bits, ranges, scales, clamp=True):
        """Evaluate the maximum fractional bit index for the quantization ranges.

        This method evaluates the minimum number of integer bits required to cover
        the specified quantization ranges after having rescaled them with the specified
        scales.
        It simply calls the equivalent FixedPoint method on the rescaled ranges.
        If specified, it clamps the results to the available value_bits.

        Args:
            value_bits (int): the number of value bits.
            ranges (tf.Tensor): a tensor of float quantization ranges.
            scales (tf.Tensor): the scales to apply to the quantization ranges.
            clamp (bool, optional): clamp the results to self.value_bits. Defaults to True.

        Returns:
            tf.Tensor: a tensor of fractional bits.
        """
        return FixedPoint.max_frac_bits(value_bits, ranges / scales, clamp)



[docs]
    @staticmethod
    def optimal_scales(ranges, value_bits):
        r"""Evaluates the optimal QFloat scales for quantization ranges.

        We choose the optimal quantization range for a given bitwidth as:

            [-int_max, int_max], with :math:`int\_max = 2^{bits} - 1`.

        This methods evaluates the scales as the ratio to align the specified ranges
        to the optimal ranges.

        Args:
            ranges (tf.Tensor): a tensor of quantization ranges.
            value_bits (int): the number of value bits.

        Returns:
            tf.Tensor: the optimal scales.
        """
        # Evaluate the scales as the ratio to align the ranges on the optimal quantization range
        optimal_ranges = QTensor.int_max(value_bits)
        scales = tf.cast(ranges, tf.float32) / optimal_ranges
        return scales



[docs]
    @staticmethod
    def quantize_scales(scales, scale_bits):
        """Quantizes the QFloat scales with the specified bitwidth.

        Args:
            scales (tf.Tensor): a tensor of float scales.
            scale_bits (int): the number of scales bits.

        Returns:
            :obj:`FixedPoint`: the FixedPoint scales.
        """
        # Evaluate QFloat scales fractional bits, without clamping to scale_bits
        scales_frac_bits = FixedPoint.max_frac_bits(scale_bits, scales, clamp=False)
        return FixedPoint.quantize(scales, scale_bits, scales_frac_bits)



[docs]
    @staticmethod
    def quantize(x, value_bits, scales, frac_bits=0.):
        """Converts a float Tensor to a QFloat

        It first evaluates and quantizes the scales required to align the quantization ranges
        to the optimal range for the specified value bits.

        It then quantizes the inputs with the quantized scales.

        The resulting integer values are clipped to [-int_max-1, int_max].

        Args:
            x (tf.Tensor): a tensor of float values.
            value_bits (int): the number of value bits.
            scales (tf.Tensor): a tensor of alignment scales.
            frac_bits (int): the inner FixedPoint fractional bits (defaults to 0).

        Returns:
            :obj:`QFloat`: the QFloat representation.
        """
        # Rescale the inputs to project them in the actual quantization ranges
        x_scaled = x / scales
        # Quantize the rescaled inputs with the specified fractional bits
        fp_x_scaled = FixedPoint.quantize(x_scaled, value_bits, frac_bits)
        return QFloat(fp_x_scaled, scales)



[docs]
    def to_float(self):
        """Returns a float representation of the QFloat

        Returns:
            tf.Tensor: the float representation.
        """
        return self.fp.to_float() * self.scales



[docs]
    def to_fixed_point(self, scale_bits=8):
        """Returns a FixedPoint representation of the QFloat

        Args:
            scale_bits (int, optional): the scales quantization bitwidth. Defaults to 8.

        Returns:
            (:obj:`FixedPoint`, :obj:`FixedPoint`): the FixedPoint representation and scales.
        """
        # Quantize the scales
        scales = QFloat.quantize_scales(self.scales, scale_bits)
        # Return the FixedPoint product of scales and values
        return tf.math.multiply(self.fp, scales, name="apply_scales"), scales



[docs]
    def upscale(self, frac_bits, value_bits=None):
        """Align a QFloat to a specified precision

        The target precision must be higher than the current one.

        Args:
            frac_bits (tf.Tensor): the target fractional bits
            value_bits (int, optional): the target value bits (defaults to current value bits)

        Returns:
            :obj:`FixedPoint`: the upscaled FixedPoint
        """
        if value_bits is None:
            value_bits = self.value_bits
        # Upscale the inner values FixedPoint
        fp, shift = self.fp.upscale(frac_bits, value_bits)
        # Return a new QFloat with updated values and the same scale
        return QFloat(fp, self.scales), shift



[docs]
    def expand(self, value_bits):
        """Expand the QFloat to the specified bitwidth

        This returns an equivalent QFloat with a higher or equal number of
        value bits and a scalar fractional bit corresponding to the maximum of
        the initial fractional bits on all channels. The scales remains unchanged.

        This is mostly used to recover a per-tensor QFloat that has been
        compressed to a lower number of value bits.

        Note that even if the frac_bits are aligned the scales remained unchanged.

        Args:
            value_bits (int): the target value_bits

        Returns:
            tuple(:obj:`QFloat`, tf.Tensor): a new QFloat with expanded
            fractional bits and the shift that was applied.
        """
        if value_bits < self.value_bits:
            raise ValueError(
                f"Cannot reduce {self.name} bitwidth from {self.value_bits} to {value_bits}:"
                " use a quantizer instead.")
        max_frac_bits = tf.reduce_max(self.frac_bits)
        return self.upscale(max_frac_bits, value_bits)



[docs]
    def promote(self, bits):
        """Increase the number of value bits

        Args:
            bits (int): the new number of value bits

        Returns:
            :obj:`QFloat`: a QFloat with increased value bits
        """
        # Return a new QFloat with identical scales and a promoted inner FixedPoint
        return QFloat(self.fp.promote(bits), self.scales)


    def __add__(self, other):
        if isinstance(other, QFloat):
            # Check that self and other have the same scales
            assert_equal(self.scales, other.scales,
                         message=f"{self.name} and {other.name} have different scales.")
            # Return a new QFloat
            return QFloat(self.fp + other.fp, self.scales)
        raise TypeError(
            f"Unsupported operand type(s) for +: 'QFloat' and '{type(other)}'")

    def __sub__(self, other):
        if isinstance(other, QFloat):
            # Check that self and other have the same scales
            assert_equal(self.scales, other.scales,
                         message=f"{self.name} and {other.name} have different scales.")
            # Return a new QFloat
            return QFloat(self.fp - other.fp, self.scales)
        raise TypeError(
            f"Unsupported operand type(s) for -: 'QFloat' and '{type(other)}'")

    def __truediv__(self, other):
        # We only support the division of the scales by a compatible type
        div_scales = self.scales / other
        # Return a Qfloat with updated scales
        return QFloat(self.fp, div_scales)

    def __getitem__(self, idx):

        r"""Retrieve a slice or element from the QFloat tensor.

        This function allows for slicing or indexing the QFloat tensor in a similar way
        to a standard TensorFlow tensor. It handles the slicing of both the fixedpoint
        and the scales of the QFloat tensor, ensuring that the tensor
        representation remains consistent.

        Args:
            idx (int, slice, or tuple): The index or slice to retrieve. It can be an integer,
                a slice, or a tuple of integers/slices. The tuple can
                also include the Ellipsis (`...`) to denote a slice
                over remaining dimensions.

        Returns:
            QFloat: A new QFloat instance containing the sliced values and corresponding
                scales.
        """

        sliced_fp = self.fp[idx]

        if self.per_tensor:
            return QFloat(sliced_fp, self.scales)

        else:
            values_rank = len(self.shape)
            sliced_scales = slice_tensor_by_index(idx, values_rank, self.scales)
            return QFloat(sliced_fp, sliced_scales)