#!/usr/bin/env python
# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Weight quantizers API"""
# -*- coding: utf-8 -*-
from __future__ import absolute_import
import keras.backend as K
import tensorflow as tf
from keras.layers import Layer
[docs]class WeightQuantizer(Layer):
"""The base class for all weight quantizers.
This base class must be overloaded as well as the two functions `quantize`
and `scale_factor`.
Quantizers derived from this class must be symmetric uniform mid-tread
quantizers, in order to be compatible with the conversion into an Akida
model. Quantization is usually done in two steps:
#. The weights must be first quantized on integer values in the
range imposed by the bitwidth, e.g. from -7 to 7 for a 4-bit
quantization.
#. These integer weights are then reconstructed to float discretized
values, in the range of the original weights. For example, 4-bit integer
weights are reconstructed on a grid from -7*qstep to 7*qstep, where
qstep is the quantization step size between two levels of the uniform
grid.
For a full explanation about mid-tread uniform quantization, one can take a
look at `the Wikipedia page <https://en.wikipedia.org/wiki/Quantization_(signal_processing)#Example>`_.
The `quantize` function takes as inputs the original weights and must
return the reconstructed float values after quantization. The
`scale_factor` function must return the factor used to transform the float
reconstructed weights into the integer values obtained after step 1. In other
words, given a set of float weights "w":
quantize(w) * scale_factor(w) is a set of integer weights.
The bitwidth defines the number of quantization levels on which the
weights will be quantized. For instance, a 4-bit quantization gives
integer values between -7 and 7. More generally, for a n-bit
quantization, values are ranged from -kmax to kmax where kmax is
(2^(n-1) - 1).
Args:
bitwidth (int): the quantization bitwidth.
"""
def __init__(self, bitwidth, **kwargs):
if bitwidth <= 1:
raise ValueError(
f"The quantization bitwidth should be higher than 1.\
Receives bitwidth {bitwidth}")
self.bitwidth_ = int(bitwidth)
self.kmax_ = (2.**(bitwidth - 1) - 1)
super().__init__(**kwargs)
[docs] def quantize(self, w):
"""Quantizes the specified weights Tensor.
This function must return a tf.Tensor containing float weights
discretized on a uniform grid based on the scale factor "sf".
In other words, the discretized weights must be values among:
-kmax/sf, ..., -2/sf, -1/sf, 0, 1/sf, 2/sf, ..., kmax/sf
Args:
w (:obj:`tensorflow.Tensor`): the weights Tensor to quantize.
Returns:
:obj:`tensorflow.Tensor`: a Tensor of quantized weights.
"""
raise NotImplementedError()
[docs] def scale_factor(self, w):
"""Evaluates the scale factor for the specified weights tf.Tensor.
This function returns the scale factor to get the quantized integer
weights from the reconstructed float weights. It is equal to the
inverse of the quantization step size.
The scale factor can be a scalar that is applied on the whole tensor
of weights. It can also be a vector of length the number of filters,
where each value applies to the weights of the corresponding output
filter. This is called a per-axis quantization, as opposed to a
per-tensor quantization. The number of filters is usually the last
dimension of the weights tensor. More details are given
`here <https://www.tensorflow.org/lite/performance/quantization_spec?hl=sv#per-axis_vs_per-tensor>`__
Note that the `quantizer_dw` of a depthwise convolution in a
QuantizedSeparableConv2D layer must imperatively return a scalar scale
factor.
Args:
w (:obj:`tensorflow.Tensor`): the weights Tensor to quantize.
Returns:
:obj:`tensorflow.Tensor`: a Tensor containing a list of scalar values
(1 or more).
"""
raise NotImplementedError()
@property
def bitwidth(self):
"""Returns the bitwidth of the quantizer"""
return self.bitwidth_
[docs] def get_config(self):
config = super().get_config()
config.update({'bitwidth': self.bitwidth_})
return config
[docs]class LinearWeightQuantizer(WeightQuantizer):
"""An abstract linear weight quantizer
This abstract class proposes a linear symmetric and uniform quantization
function. The "linear" term here means that there is no non-linear
transformation of the weights before the uniform quantization.
The `scale_factor` function must be overloaded.
"""
[docs] def scale_factor(self, w):
raise NotImplementedError()
[docs] def quantize(self, w):
"""Linearly quantizes the input weights on a symmetric uniform grid
based on the scale factor.
The input weights are directly rounded to the closest discretized
value, without any transformation on the input weights.
The gradient is estimated using the Straight-Through Estimator (STE),
i.e. the gradient is computed as if there were no quantization.
"""
sf = self.scale_factor(w)
return tf.clip_by_value(round_through(w * sf), -self.kmax_,
self.kmax_) / sf
[docs]class StdWeightQuantizer(LinearWeightQuantizer):
"""A uniform quantizer based on weights standard deviation.
Quantizes the specified weights into 2^bitwidth-1 values centered on zero.
E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep
with qstep being the quantization step. The quantization step is defined by:
qstep = threshold * std(W) / max_value
with max_value being 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7.
All values below or above threshold * std(W) are automatically assigned to
the min (resp max) value.
Args:
threshold (int): the standard deviation multiplier used to exclude
outliers.
bitwidth (int): the quantizer bitwidth defining the number of
quantized values.
"""
def __init__(self, threshold=3, bitwidth=4, **kwargs):
# Having a cast guarantees a check when the parameters are not numbers
# (e.g.: None)
self.threshold_ = float(threshold)
# Initialize parent to store the bitwidth
super().__init__(bitwidth, **kwargs)
[docs] @staticmethod
def sigma_(w):
"""Returns the standard deviation(s) of a set of weights"""
return tf.math.reduce_std(w)
@property
def threshold(self):
"""Returns the threshold of the std quantizer"""
return self.threshold_
[docs] def scale_factor(self, w):
sigma_scaled = self.sigma_(w) * self.threshold_
return clip_scale_factor(self.kmax_ / sigma_scaled)
[docs] def get_config(self):
config = super().get_config()
config.update({'threshold': self.threshold_})
return config
[docs]class StdPerAxisQuantizer(StdWeightQuantizer):
"""A quantizer that relies on weights standard deviation per axis.
Quantizes the specified weights into 2^bitwidth-1 values centered on zero.
E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep
with qstep being the quantization step. The quantization step is defined by:
qstep = max_range / max_value
with:
- max_range = max(abs(W))
- max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7.
This is an evolution of the StdWeightQuantizer that defines the weights
range per axis.
The last dimension is used as axis, meaning that the scaling factor is a
vector with as many values as "filters", or "neurons".
Note: for a DepthwiseConv2D layer that has a single filter, this
quantizer is strictly equivalent to the StdWeightQuantizer.
"""
[docs] def sigma_(self, w):
red_range = tf.range(tf.rank(w) - 1)
return tf.math.reduce_std(w, red_range)
[docs]class MaxQuantizer(LinearWeightQuantizer):
"""A quantizer that relies on maximum range.
Quantizes the specified weights into 2^bitwidth-1 values centered on zero.
E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep
with qstep being the quantization step. The quantization step is defined by:
qstep = max_range / max_value
with:
- max_range = max(abs(W))
- max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7.
Args:
bitwidth (int): the quantizer bitwidth defining the number of
quantized values.
"""
def __init__(self, bitwidth=4, **kwargs):
# Initialize parent to store the bitwidth
super().__init__(bitwidth, **kwargs)
[docs] @staticmethod
def max_range_(w):
"""Get the range on which the weights are quantized. This quantizer
discretizes weights in the range:
[-max(weights) ; max(weights)]
"""
return tf.math.reduce_max(tf.math.abs(w))
[docs] def scale_factor(self, w):
return clip_scale_factor(self.kmax_ / self.max_range_(w))
[docs]class MaxPerAxisQuantizer(MaxQuantizer):
"""A quantizer that relies on maximum range per axis.
Quantizes the specified weights into 2^bitwidth-1 values centered on zero.
E.g. with bitwidth = 4, 15 quantization levels: from -7 * qstep to 7 * qstep
with qstep being the quantization step. The quantization step is defined by:
qstep = max_range / max_value
with:
- max_range = max(abs(W))
- max_value = 2^(bitwidth-1) - 1. E.g with bitwidth = 4, max_value = 7.
This is an evolution of the MaxQuantizer that defines the max_range per
axis.
The last dimension is used as axis, meaning that the scaling factor is a
vector with as many values as "filters", or "neurons".
Note: for a DepthwiseConv2D layer that has a single filter, this
quantizer is strictly equivalent to the MaxQuantizer.
"""
[docs] def max_range_(self, w):
red_range = tf.range(tf.rank(w) - 1)
low_limit_range = 1e-10
return tf.clip_by_value(tf.math.reduce_max(tf.math.abs(w), red_range),
low_limit_range, tf.float32.max)
def get(identifier):
"""Returns the weight quantizer corresponding to the identifier.
The 'identifier' input can take two types: either a weight quantizer
instance, or a dictionary corresponding to the config serialization
of a weight quantizer.
Args:
identifier (WeightQuantizer or dict): either a WeightQuantizer
instance or a configuration dictionary to deserialize.
Returns:
:obj:`cnn2snn.WeightQuantizer`: a weight quantizer
"""
if isinstance(identifier, WeightQuantizer):
return identifier
if isinstance(identifier, dict):
return tf.keras.utils.deserialize_keras_object(
identifier,
custom_objects={
'StdWeightQuantizer': StdWeightQuantizer,
'StdPerAxisQuantizer': StdPerAxisQuantizer,
'MaxQuantizer': MaxQuantizer,
'MaxPerAxisQuantizer': MaxPerAxisQuantizer
})
raise ValueError(f"Could not interpret identifier {identifier} "
"for a weight quantizer object")
def round_through(x):
"""Element-wise rounding to the closest integer with full gradient propagation.
A trick from [Sergey Ioffe](http://stackoverflow.com/a/36480182).
"""
rounded = K.round(x)
return x + K.stop_gradient(rounded - x)
def ceil_through(x):
"""Element-wise ceiling operation (to the closest greater integer) with
full gradient propagation.
"""
ceiling_value = tf.math.ceil(x)
return x + K.stop_gradient(ceiling_value - x)
def clip_scale_factor(scale_factor):
""" Clips scale factor(s) to 1000, in order to avoid very large scale
factors and thus very large Akida thresholds after conversion.
"""
return tf.clip_by_value(scale_factor, 0, 1e3)