# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""Model quantization API"""
import tensorflow as tf
from keras.models import clone_model
from keras.layers import Conv2D, SeparableConv2D, Dense, ReLU, Layer
from .quantization_ops import MaxQuantizer, MaxPerAxisQuantizer
from .quantization_layers import (QuantizedConv2D, QuantizedSeparableConv2D,
QuantizedDense, ActivationDiscreteRelu,
QuantizedActivation, QuantizedReLU)
from .transforms import sequentialize, invert_batchnorm_pooling, fold_batchnorm
from .cnn2snn_objects import cnn2snn_objects
keras_neural_layers = (Conv2D, SeparableConv2D, Dense)
cnn2snn_neural_layers = (QuantizedConv2D, QuantizedSeparableConv2D,
QuantizedDense)
supported_neural_layers = keras_neural_layers + cnn2snn_neural_layers
[docs]
def quantize(model,
weight_quantization=0,
activ_quantization=0,
input_weight_quantization=None,
fold_BN=True,
quantizer_function=None):
"""Converts a standard sequential Keras model to a CNN2SNN Keras quantized
model, compatible for Akida conversion.
This function returns a Keras model where the standard neural layers
(Conv2D, SeparableConv2D, Dense) and the ReLU activations are replaced with
CNN2SNN quantized layers (QuantizedConv2D, QuantizedSeparableConv2D,
QuantizedDense, QuantizedRelu).
Several transformations are applied to the model:
- the order of MaxPool and BatchNormalization layers are inverted so that
BatchNormalization always happens first,
- the batch normalization layers are folded into the previous layers.
This new model can be either directly converted to akida, or first
retrained for a few epochs to recover any accuracy loss.
Args:
model (tf.keras.Model): a standard Keras model
weight_quantization (int): sets all weights in the model to have
a particular quantization bitwidth except for the weights in the
first layer.
* '0' implements floating point 32-bit weights.
* '2' through '8' implements n-bit weights where n is from 2-8 bits.
activ_quantization (int): sets all activations in the model to have a
particular activation quantization bitwidth.
* '0' implements floating point 32-bit activations.
* '1' through '8' implements n-bit weights where n is from 1-8 bits.
input_weight_quantization (int): sets weight quantization in the first
layer. Defaults to weight_quantization value.
* 'None' implements the same bitwidth as the other weights.
* '0' implements floating point 32-bit weights.
* '2' through '8' implements n-bit weights where n is from 2-8 bits.
fold_BN (bool): enable folding batch normalization layers with their
corresponding neural layer.
quantizer_function (function): callable that takes as argument the layer
instance to be quantized and the corresponding default quantizer and
returns the quantizer to use.
Returns:
tf.keras.Model: a quantized Keras model
"""
# Overrides input weight quantization if None
if input_weight_quantization is None:
input_weight_quantization = weight_quantization
if fold_BN:
# Identify sequential branches
seq_model = sequentialize(model)
# Invert batch normalization and pooling
model_t = invert_batchnorm_pooling(seq_model)
# Fold batch norm layers with corresponding neural layers
model_t = fold_batchnorm(model_t)
else:
# Keep original model without transformation
model_t = model
# Convert neural layers and ReLU to CNN2SNN quantized layers
first_neural_layer = True
def replace_layer(layer):
nonlocal first_neural_layer
if isinstance(layer, (ReLU, QuantizedActivation)):
return _quantize_activation_layer(layer, activ_quantization)
if type(layer) in supported_neural_layers:
if first_neural_layer:
bitwidth = input_weight_quantization
first_neural_layer = False
else:
bitwidth = weight_quantization
return _convert_to_quantized_layer(layer, bitwidth,
quantizer_function)
return layer.__class__.from_config(layer.get_config())
with tf.keras.utils.custom_object_scope(cnn2snn_objects):
new_model = clone_model(model_t, clone_function=replace_layer)
new_model.set_weights(model_t.get_weights())
return new_model
[docs]
def quantize_layer(model, target_layer, bitwidth, quantizer_function=None):
"""Quantizes a specific layer with the given bitwidth.
This function returns a Keras model where the target layer is quantized.
All other layers are preserved.
If the target layer is a native Keras layer (Conv2D, SeparableConv2D, Dense,
ReLU), it is replaced by a CNN2SNN quantized layer (QuantizedConv2D,
QuantizedSeparableConv2D, QuantizedDense, ActivationDiscreteRelu). If
the target layer is an already quantized layer, only the bitwidth is
modified.
Examples:
>>> # Quantize a layer of a native Keras model
>>> model = tf.keras.Sequential([
... tf.keras.layers.Dense(5, input_shape=(3,)),
... tf.keras.layers.Softmax()])
>>> model_quantized = cnn2snn.quantize_layer(model,
... target_layer=0,
... bitwidth=4)
>>> assert isinstance(model_quantized.layers[0], cnn2snn.QuantizedDense)
>>> print(model_quantized.layers[0].quantizer.bitwidth)
4
>>> # Quantize a layer of an an already quantized layer
>>> model_quantized = cnn2snn.quantize_layer(model_quantized,
... target_layer=0, bitwidth=2)
>>> print(model_quantized.layers[0].quantizer.bitwidth)
2
Args:
model (tf.keras.Model): a standard Keras model
target_layer: a standard or quantized Keras layer to be
converted, or the index or name of the target layer.
bitwidth (int): the desired quantization bitwidth. If zero, no
quantization will be applied.
quantizer_function (function): callable that takes as argument the layer
instance to be quantized and the corresponding default quantizer and
returns the quantizer to use.
Returns:
tf.keras.Model: a quantized Keras model
Raises:
ValueError: In case of invalid target layer
ValueError: If bitwidth is not greater than zero
"""
if not bitwidth > 0:
raise ValueError("Only bitwidth greater than zero is supported. "
f"Receives bitwidth {bitwidth}.")
if isinstance(target_layer, int):
layer_to_quantize = model.layers[target_layer]
elif isinstance(target_layer, str):
layer_to_quantize = model.get_layer(target_layer)
elif isinstance(target_layer, Layer):
layer_to_quantize = target_layer
else:
raise ValueError("Target layer argument is not recognized")
def replace_layer(layer):
if layer == layer_to_quantize:
if isinstance(layer, (ReLU, QuantizedActivation)):
return _quantize_activation_layer(layer, bitwidth)
if type(layer) in supported_neural_layers:
return _convert_to_quantized_layer(layer, bitwidth,
quantizer_function)
return layer.__class__.from_config(layer.get_config())
return layer.__class__.from_config(layer.get_config())
with tf.keras.utils.custom_object_scope(cnn2snn_objects):
new_model = clone_model(model, clone_function=replace_layer)
new_model.set_weights(model.get_weights())
return new_model
def _convert_to_quantized_layer(layer, bitwidth, quantizer_function=None):
"""Quantizes a standard Keras layer (Conv2D, SeparableConv2D, Dense) or a
CNN2SNN quantized layer (QuantizedConv2D, QuantizedSeparableConv2D,
QuantizedDense) to a CNN2SNN quantized layer with given bitwidth.
A native Keras layer will be converted to a quantized layer with a
MaxPerAxisQuantizer if quantizer_function is not specified.
Args:
layer (tf.keras.Layer): a standard Keras (Conv2D, SeparableConv2D or
Dense) or quantized (QuantizedConv2D, QuantizedSeparableConv2D,
QuantizedDense) layer.
bitwidth (int): the desired weight quantization bitwidth. If zero, the
Keras neural layer will be returned as it is.
quantizer_function (function): callable that takes as argument the layer
instance to be quantized and the corresponding default quantizer and
returns the quantizer to use.
Returns:
:obj:`keras.Layer`: a CNN2SNN quantized Keras layer
Raises:
ValueError: if a quantized layer is quantized with bitwidth 0.
"""
config = layer.get_config()
# Handle case where bitwidth=0
if bitwidth == 0:
if isinstance(layer, cnn2snn_neural_layers):
raise ValueError(f"A quantized layer cannot be quantized with "
f"bitwidth 0. Receives layer {layer.name} of type "
f" {layer.__class__.__name__}.")
return layer.__class__.from_config(config)
# Set quantizer with expected bitwidth
if 'quantizer' not in config:
quantizer = MaxPerAxisQuantizer(bitwidth=bitwidth)
if quantizer_function is not None:
quantizer = quantizer_function(layer, quantizer)
config['quantizer'] = quantizer
else:
config['quantizer']['config']['bitwidth'] = bitwidth
# Function to handle unsupported arguments in config
def pop_unsupported_args(class_type):
for arg, default_value in class_type.unsupported_args.items():
if (arg in config and config[arg] != default_value):
raise RuntimeError(
f"Argument '{arg}' in layer '{layer.name}' is only "
f"supported with default value '{default_value}'. "
f"Receives '{config[arg]}'.")
config.pop(arg, None)
# Return quantized layer, based on the config
if isinstance(layer, Conv2D):
pop_unsupported_args(QuantizedConv2D)
return QuantizedConv2D.from_config(config)
if isinstance(layer, SeparableConv2D):
if 'quantizer_dw' not in config:
quantizer = MaxQuantizer(bitwidth=bitwidth)
if quantizer_function is not None:
quantizer = quantizer_function(layer, quantizer)
config['quantizer_dw'] = quantizer
else:
config['quantizer_dw']['config']['bitwidth'] = bitwidth
pop_unsupported_args(QuantizedSeparableConv2D)
return QuantizedSeparableConv2D.from_config(config)
if isinstance(layer, Dense):
pop_unsupported_args(QuantizedDense)
return QuantizedDense.from_config(config)
return None
def _quantize_activation_layer(layer, bitwidth):
"""Quantizes a Keras ReLU layer or a CNN2SNN quantized activation to the
given bitwidth. A ReLU layer is converted to an ActivationDiscreteRelu
layer.
Args:
layer (tf.keras.Layer): an activation layer (ReLU or CNN2SNN quantized
activation)
bitwidth (int): the desired quantization bitwidth. If zero, the ReLU
layer will be returned as it is.
Returns:
:obj:`keras.Layer`: a CNN2SNN quantized Keras layer
Raises:
ValueError: if a quantized activation layer is quantized with
bitwidth 0.
"""
if bitwidth == 0:
if isinstance(layer, QuantizedActivation):
raise ValueError(f"A quantized activation cannot be quantized with "
f"bitwidth 0. Receives layer {layer.name} of type "
f" {layer.__class__.__name__}.")
return layer.__class__.from_config(layer.get_config())
if isinstance(layer, ReLU):
if layer.threshold != 0:
print(f"Skipping ReLU layer {layer.name} with non-zero threshold")
return layer.__class__.from_config(layer.get_config())
# Instantiate a QuantizedReLU using the original ReLU max_value
return QuantizedReLU(bitwidth, layer.max_value, name=layer.name)
if isinstance(layer, ActivationDiscreteRelu):
# Evaluate the ActivationDiscreteRelu maximum value
max_value = layer.step * layer.levels
# Convert it to a QuantizedReLU
return QuantizedReLU(bitwidth, max_value, name=layer.name)
config = layer.get_config()
config['bitwidth'] = bitwidth
return layer.__class__.from_config(config)