#!/usr/bin/env python
# ******************************************************************************
# Copyright 2024 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""
Data augmentation for object detection
"""
__all__ = ['augment_sample', 'build_yolo_aug_pipeline', 'init_random_vars', 'resize_image']
import cv2
import numpy as np
from imgaug import augmenters as iaa
from imgaug.augmentables.bbs import BoundingBoxesOnImage, BoundingBox as BoundingBoxAug
from .data_utils import Coord
[docs]def augment_sample(image, objects, aug_pipe, labels, flip, scale, offx, offy):
"""
Applies data augmentation to an image and its associated objects.
Args:
image (np.ndarray): the input image as a NumPy array.
objects (dict): dictionary containing information about objects in the image,
including labels and bounding boxes.
aug_pipe (iaa.Augmenter): the augmentation pipeline.
labels (list): list of labels of interest.
flip (bool): binary value indicating whether to flip the image or not.
scale (float): scaling factor for the image.
offx (int): horizontal translation offset for the image.
offy (int): vertical translation offset for the image.
Returns:
np.ndarray, dict: augmented image and objects.
"""
h, w, _ = image.shape
# Scale the image
image = cv2.resize(image, (0, 0), fx=scale, fy=scale)
# Translate the image
image = image[offy:(offy + h), offx:(offx + w)]
if flip:
image = cv2.flip(image, 1)
bbs = _objects_to_bbox(objects, labels, image.shape)
image, bbs = aug_pipe(image=image, bounding_boxes=bbs)
bbs.remove_out_of_image().clip_out_of_image()
objects = _bbox_to_objects(bbs, labels)
return image, objects
def _objects_to_bbox(objects, labels, image_shape):
"""
Transforms objects bounding boxes from numpy array to BoundingBox format from imgaug library.
Args:
objects (dict): dictionary containing information about objects in the image,
including labels and bounding boxes.
labels (list): list of labels of interest.
image_shape (tuple): the shape of the image on which the objects are placed.
Returns:
BoundingBoxesOnImage: BoundingBoxesOnImage object from imgaug library containing
bounding boxes coordinates with their label.
"""
boxes = objects['bbox']
formattedbox = [BoundingBoxAug(x1=box[Coord.x1],
y1=box[Coord.y1],
x2=box[Coord.x2],
y2=box[Coord.y2],
label=labels[objects['label'][idx]])
for idx, box in enumerate(boxes)]
return BoundingBoxesOnImage(formattedbox, shape=image_shape)
def _bbox_to_objects(boxes, labels):
"""
Extracts bounding boxes from imgaug objects to a dictionary containing
bounding box coordinates and corresponding labels.
Args:
boxes (list): list of imgaug bounding boxes.
labels (list): list of labels of interest.
Returns:
dict: dictionary containing information about objects in the image,
including labels and bounding boxes.
"""
objects_boxes = [
[
bbox.y1,
bbox.x1,
bbox.y2,
bbox.x2,
] for bbox in boxes]
objects_labels = [labels.index(box.label) for box in boxes]
objects = {
'bbox': objects_boxes,
'label': objects_labels
}
return objects
[docs]def init_random_vars(h, w):
"""
Initialize random variables for data augmentation.
Args:
h (int): height of the input image.
w (int): width of the input image.
Returns:
(bool, float, int, int): flip, scale, offx, offy.
"""
rng = np.random.default_rng()
flip = rng.choice(a=[False, True])
scale = rng.uniform() / 10. + 1.
max_offx = (scale - 1.) * w
max_offy = (scale - 1.) * h
offx = int(rng.uniform() * max_offx)
offy = int(rng.uniform() * max_offy)
return flip, scale, offx, offy
[docs]def resize_image(image, input_shape):
"""
Change image dimensions
Args:
image (np.ndarray): input image with size represented as (h, w, c).
input_shape (tuple): tuple containing desired image
dimension in form of (h, w, c).
Returns:
np.ndarray: the resized image.
"""
image = cv2.resize(image, (input_shape[1], input_shape[0]))
return image
def fix_obj_position_and_size(objects, h, w, input_shape, scale, offx, offy, training, flip):
"""
Adjust object positions and sizes based on augmentation parameters.
Args:
objects (dict): dictionary containing information about objects in the image,
including labels and bounding boxes.
h (int): height of the input image.
w (int): width of the input image.
input_shape (tuple): the desired input shape for the image.
scale (float): scaling factor for the image.
offx (int): horizontal offset for translation.
offy (int): vertical offset for translation.
training (bool, optional): True to augment training data,
False for validation.
flip (bool): binary value indicating whether to flip the image or not.
Returns:
dict: updated objects information.
"""
boxes = np.array(objects['bbox'])
aspect_ratios = np.array([float(input_shape[0]) / h,
float(input_shape[1]) / w,
float(input_shape[0]) / h,
float(input_shape[1]) / w])
offset = np.array([offy, offx, offy, offx])
if training:
boxes = (boxes * scale - offset).astype(int)
boxes = np.clip((boxes * aspect_ratios).astype(int), 0, input_shape[:2] + input_shape[:2])
if training and flip:
xmin = boxes[:, Coord.x1].copy()
boxes[:, Coord.x1] = input_shape[1] - boxes[:, Coord.x2]
boxes[:, Coord.x2] = input_shape[1] - xmin
objects['bbox'] = boxes
return objects
[docs]def build_yolo_aug_pipeline():
""" Defines a sequence of augmentation steps for Yolo training
that will be applied to every image.
Returns:
iaa.Sequential: sequence of augmentation.
"""
# augmentors by https://github.com/aleju/imgaug
def sometimes(aug): return iaa.Sometimes(0.5, aug)
# All augmenters with per_channel=0.5 will sample one value per
# image in 50% of all cases. In all other cases they will sample new
# values per channel.
return iaa.Sequential(
[
# apply the following augmenters to most images
sometimes(iaa.Affine()),
# execute 0 to 5 of the following (less important) augmenters
# per image. Don't execute all of them, as that would often be
# way too strong
iaa.SomeOf(
(0, 5),
[
iaa.OneOf([
# blur images with a sigma between 0 and 3.0
iaa.GaussianBlur((0, 3.0)),
# blur image using local means (kernel sizes between
# 2 and 7)
iaa.AverageBlur(k=(2, 7)),
# blur image using local medians (kernel sizes
# between 3 and 11)
iaa.MedianBlur(k=(3, 11)),
]),
# sharpen images
iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)),
# add gaussian noise
iaa.AdditiveGaussianNoise(
loc=0, scale=(0.0, 0.05 * 255), per_channel=0.5),
# randomly remove up to 10% of the pixels
iaa.OneOf([
iaa.Dropout((0.01, 0.1), per_channel=0.5),
]),
# change brightness of images
iaa.Add((-10, 10), per_channel=0.5),
# change brightness of images
iaa.Multiply((0.5, 1.5), per_channel=0.5),
# improve or worsen the contrast
iaa.LinearContrast((0.5, 2.0), per_channel=0.5),
],
random_order=True)
],
random_order=True)