Source code for akida_models.centernet.centernet_batch_generator

#!/usr/bin/env python
# ******************************************************************************
# Copyright 2023 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""
Data generator for CenterNet training
"""

__all__ = ["BatchCenternetGenerator"]

import numpy as np
import tensorflow as tf
from imgaug import augmenters as iaa

from ..detection.batch_generator import BatchGenerator


[docs]class BatchCenternetGenerator(BatchGenerator): """ Data generator used for the training process """ @staticmethod def build_aug_pipeline(): # augmentors by https://github.com/aleju/imgaug def sometimes(aug): return iaa.Sometimes(0.5, aug) # All augmenters with per_channel=0.5 will sample one value per # image in 50% of all cases. In all other cases they will sample new # values per channel. return iaa.Sequential( [ # apply the following augmenters to most images sometimes(iaa.Affine()), sometimes(iaa.Cutout(nb_iterations=(0, 3), position="uniform", size=(0, 0.3))), # execute 0 to 5 of the following (less important) augmenters # per image. Don't execute all of them, as that would often be # way too strong iaa.SomeOf( (0, 5), [ iaa.OneOf([ # blur images with a sigma between 0 and 3.0 iaa.GaussianBlur((0, 3.0)), # blur image using local means (kernel sizes between 2 and 7) iaa.AverageBlur(k=(2, 7)), # blur image using local medians (kernel sizes between 3 and 11) iaa.MedianBlur(k=(3, 11)), ]), # sharpen images iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), iaa.LinearContrast((0.5, 2.0), per_channel=0.5), iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5), # randomly remove up to 10% of the pixels iaa.OneOf([iaa.Dropout((0.01, 0.1), per_channel=0.5), iaa.CoarseDropout((0.01, 0.05), size_percent=0.5)]), # change brightness of images iaa.Add((-10, 10), per_channel=0.5), iaa.Multiply((0.5, 1.5), per_channel=0.5), iaa.OneOf([ iaa.pillike.Equalize(), iaa.pillike.Autocontrast() ]) ], random_order=True) ], random_order=True) def __getitem__(self, idx): lower_bound = idx * self._batch_size upper_bound = (idx + 1) * self._batch_size if upper_bound > self._data_length: upper_bound = self._data_length lower_bound = upper_bound - self._batch_size instance_count = 0 N = upper_bound - lower_bound x_batch = np.zeros((N, self._input_shape[1], self._input_shape[0], self._input_shape[2])) y_batch = np.zeros((N, self._grid_size[1], self._grid_size[0], 2 + 2 + len(self._labels))) for train_instance in self._data[lower_bound:upper_bound]: # augment input image and fix object's position and size img, all_objs = self.aug_image(train_instance, jitter=self._jitter) for obj in all_objs: if obj['x2'] > obj['x1'] and obj['y2'] > obj['y1'] and obj['label'] in self._labels: # find center center_x = .5 * (obj['x1'] + obj['x2']) # express it in output size center_x = center_x / (float(self._input_shape[0]) / self._grid_size[0]) center_y = .5 * (obj['y1'] + obj['y2']) center_y = center_y / (float(self._input_shape[1]) / self._grid_size[0]) grid_x = int(np.floor(center_x)) grid_y = int(np.floor(center_y)) if grid_x < self._grid_size[0] and grid_y < self._grid_size[1]: obj_indx = self._labels.index(obj['label']) scale_box_w = (obj['x2'] - obj['x1']) / \ (float(self._input_shape[0]) / self._grid_size[0]) scale_box_h = (obj['y2'] - obj['y1']) / ( float(self._input_shape[1]) / self._grid_size[0]) # get the center point and use a gaussian kernel as the target radius = self._gaussian_radius([scale_box_h, scale_box_w], min_overlap=0.3) # check that the radius is positive radius = np.max(int(radius), 0) heatmap = np.zeros( (self._grid_size[0], self._grid_size[1], len(self._labels))) heatmap = self._gen_gaussian_target( heatmap, [grid_y, grid_x], obj_indx, radius) # assign ground truth to y_batch y_batch[instance_count, ..., :len(self._labels)] += heatmap y_batch[instance_count, grid_y, grid_x, -4] = scale_box_w y_batch[instance_count, grid_y, grid_x, -3] = scale_box_h y_batch[instance_count, grid_y, grid_x, -2] = center_x - grid_x y_batch[instance_count, grid_y, grid_x, -1] = center_y - grid_y # assign input image to x_batch x_batch[instance_count] = img # increase instance counter in current batch instance_count += 1 return x_batch, y_batch @staticmethod def _gaussian2D(radius, sigma=1, eps=tf.keras.backend.epsilon()): """Generate 2D gaussian kernel. Args: radius (int): Radius of gaussian kernel. sigma (int, optional): Sigma of gaussian function. Defaults to 1. eps (float, optional): Epsilon value. Defaults to 1e-7. Returns: np.array: Gaussian kernel with a ``(2 * radius + 1) * (2 * radius + 1)`` shape. """ x = np.reshape(np.arange(-radius, radius + 1), [1, -1]) y = np.reshape(np.arange(-radius, radius + 1), [-1, 1]) h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) # Clamp smaller values to zero h[h < (eps * h.max())] = 0 return h @staticmethod def _gen_gaussian_target(heatmap, center, obj_idx, radius): """Generate 2D gaussian heatmap. Args: heatmap (np.array): Input heatmap, the gaussian kernel will cover on it and maintain the max value. center (list[int]): Coordinates of gaussian kernel's center. obj_idx (int): The class index for the center point. radius (int): Radius of gaussian kernel. Returns: out_heatmap (np.array): Updated heatmap covered by gaussian kernel. Note: Taken from pytorch """ diameter = 2 * radius + 1 gaussian_kernel = BatchCenternetGenerator._gaussian2D(radius, sigma=diameter / 6) x, y = center height, width = heatmap.shape[:2] # Find the smallest value so that if the point is near the edge we don't end outside # (e.g. x = 3 and radius is 10, then we go from the x-3 to x+10) left = np.min([x, radius]) right = np.min([width - x, radius + 1]) top = np.min([y, radius]) bottom = np.min([height - y, radius + 1]) # Compare the gaussian kernel to the heatmap (in case there's already a point of # interest there) and keep the max value flattened_kernel = np.reshape(gaussian_kernel, [-1]) # Range the dimensions i = 0 for d0 in np.arange(y - top, y + bottom): for d1 in np.arange(x - left, x + right): heatmap[d1, d0, obj_idx] = flattened_kernel[i] i += 1 return heatmap @staticmethod def _gaussian_radius(det_size, min_overlap): r"""Generate 2D gaussian radius. This function is modified from the `official github repo <https://github.com/princeton-vl/CornerNet-Lite/blob/master/core/sample/ utils.py#L65>`_. Given ``min_overlap``, radius could computed by a quadratic equation according to Vieta's formulas. There are 3 cases for computing gaussian radius, details are following: - Case 1: one corner is inside the gt box and the other is outside. .. code:: text |< width >| lt-+----------+ - | | | ^ +--x----------+--+ | | | | | | | | height | | overlap | | | | | | | | | | v +--+---------br--+ - | | | +----------+--x To ensure IoU of generated box and gt box is larger than ``min_overlap``: .. math:: \cfrac{(w-r)*(h-r)}{w*h+(w+h)r-r^2} \ge {iou} \quad\Rightarrow\quad {r^2-(w+h)r+\cfrac{1-iou}{1+iou}*w*h} \ge 0 \\ {a} = 1,\quad{b} = {-(w+h)},\quad{c} = {\cfrac{1-iou}{1+iou}*w*h} {r} \le \cfrac{-b-\sqrt{b^2-4*a*c}}{2*a} - Case 2: both two corners are inside the gt box. .. code:: text |< width >| lt-+----------+ - | | | ^ +--x-------+ | | | | | | |overlap| | height | | | | | +-------x--+ | | | v +----------+-br - To ensure IoU of generated box and gt box is larger than ``min_overlap``: .. math:: \cfrac{(w-2*r)*(h-2*r)}{w*h} \ge {iou} \quad\Rightarrow\quad {4r^2-2(w+h)r+(1-iou)*w*h} \ge 0 \\ {a} = 4,\quad {b} = {-2(w+h)},\quad {c} = {(1-iou)*w*h} {r} \le \cfrac{-b-\sqrt{b^2-4*a*c}}{2*a} - Case 3: both two corners are outside the gt box. .. code:: text |< width >| x--+----------------+ | | | +-lt-------------+ | - | | | | ^ | | | | | | overlap | | height | | | | | | | | v | +------------br--+ - | | | +----------------+--x To ensure IoU of generated box and gt box is larger than ``min_overlap``: .. math:: \cfrac{w*h}{(w+2*r)*(h+2*r)} \ge {iou} \quad\Rightarrow\quad {4*iou*r^2+2*iou*(w+h)r+(iou-1)*w*h} \le 0 \\ {a} = {4*iou},\quad {b} = {2*iou*(w+h)},\quad {c} = {(iou-1)*w*h} \\ {r} \le \cfrac{-b+\sqrt{b^2-4*a*c}}{2*a} Args: det_size (list[int]): Shape of object. min_overlap (float): Min IoU with ground truth for boxes generated by keypoints inside the gaussian kernel. Returns: radius (int): Radius of gaussian kernel. Notes: Explanation of figure: ``lt`` and ``br`` indicates the left-top and bottom-right corner of ground truth box. ``x`` indicates the generated corner at the limited position when ``radius=r``. """ height, width = det_size a1 = 1 b1 = (height + width) c1 = width * height * (1 - min_overlap) / (1 + min_overlap) sq1 = np.sqrt(b1**2 - 4 * a1 * c1) r1 = (b1 - sq1) / (2 * a1) a2 = 4 b2 = 2 * (height + width) c2 = (1 - min_overlap) * width * height sq2 = np.sqrt(b2**2 - 4 * a2 * c2) r2 = (b2 - sq2) / (2 * a2) a3 = 4 * min_overlap b3 = -2 * min_overlap * (height + width) c3 = (min_overlap - 1) * width * height sq3 = np.sqrt(b3**2 - 4 * a3 * c3) r3 = (b3 + sq3) / (2 * a3) return np.min([r1, r2, r3])