#!/usr/bin/env python
# ******************************************************************************
# Copyright 2020 Brainchip Holdings Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
"""
Processing tools for YOLO data handling.
"""
__all__ = ["BoundingBox", "load_image", "preprocess_image", "decode_output",
"parse_voc_annotations", "parse_widerface_annotations"]
import os
import xml.etree.ElementTree as et
import numpy as np
import tensorflow as tf
[docs]class BoundingBox:
""" Utility class to represent a bounding box.
The box is defined by its top left corner (x1, y1), bottom right corner
(x2, y2), label, score and classes.
"""
def __init__(self, x1, y1, x2, y2, score=-1, classes=None):
self.x1 = x1
self.y1 = y1
self.x2 = x2
self.y2 = y2
self.label = -1
self.score = score
self.classes = classes
def __repr__(self):
return "<BoundingBox({}, {}, {}, {}, {}, {}, {})>\n".format(
self.x1, self.x2, self.y1, self.y2, self.get_label(),
self.get_score(), self.classes)
[docs] def get_label(self):
""" Returns the label for this bounding box.
Returns:
Index of the label as an integer.
"""
if self.label == -1:
self.label = np.argmax(self.classes)
return self.label
[docs] def get_score(self):
""" Returns the score for this bounding box.
Returns:
Confidence as a float.
"""
if self.score == -1:
self.score = self.classes[self.get_label()]
return self.score
[docs] def iou(self, other):
""" Computes intersection over union ratio between this bounding box and
another one.
Args:
other (BoundingBox): the other bounding box for IOU computation
Returns:
IOU value as a float
"""
def _interval_overlap(interval_1, interval_2):
x1, x2 = interval_1
x3, x4 = interval_2
if x3 < x1:
if x4 < x1:
return 0
return min(x2, x4) - x1
if x2 < x3:
return 0
return min(x2, x4) - x3
intersect_w = _interval_overlap([self.x1, self.x2],
[other.x1, other.x2])
intersect_h = _interval_overlap([self.y1, self.y2],
[other.y1, other.y2])
intersect = intersect_w * intersect_h
w1, h1 = self.x2 - self.x1, self.y2 - self.y1
w2, h2 = other.x2 - other.x1, other.y2 - other.y1
union = w1 * h1 + w2 * h2 - intersect
return float(intersect) / union
[docs]def load_image(image_path):
""" Loads an image from a path.
Args:
image_path (string): full path of the image to load
Returns:
a Tensorflow image Tensor
"""
raw_image = tf.io.read_file(image_path)
return tf.image.decode_jpeg(raw_image, channels=3)
[docs]def preprocess_image(image_buffer, output_size):
""" Preprocess an image for YOLO inference.
Args:
image_buffer (tf.Tensor): image to preprocess
output_size (tuple): shape of the image after preprocessing
Returns:
A resized and normalized image as a Numpy array.
"""
# Resize
width = tf.constant(output_size[0])
height = tf.constant(output_size[1])
image = tf.compat.v1.image.resize(image_buffer, [height, width],
method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
return image.numpy()
[docs]def decode_output(output,
anchors,
nb_classes,
obj_threshold=0.5,
nms_threshold=0.5):
""" Decodes a YOLO model output.
Args:
output (tf.Tensor): model output to decode
anchors (list): list of anchors boxes
nb_classes (int): number of classes
obj_threshold (float): confidence threshold for a box
nms_threshold (float): non-maximal supression threshold
Returns:
List of `BoundingBox` objects
"""
def _sigmoid(x):
return 1. / (1. + np.exp(-x))
def _softmax(x, axis=-1, t=-100.):
x = x - np.max(x)
if np.min(x) < t:
x = x / np.min(x) * t
e_x = np.exp(x)
return e_x / e_x.sum(axis, keepdims=True)
grid_h, grid_w, nb_box = output.shape[:3]
boxes = []
# decode the output by the network
output[..., 4] = _sigmoid(output[..., 4])
output[...,
5:] = output[..., 4][..., np.newaxis] * _softmax(output[..., 5:])
output[..., 5:] *= output[..., 5:] > obj_threshold
for row in range(grid_h):
for col in range(grid_w):
for b in range(nb_box):
# from 5th element onwards are class classes
classes = output[row, col, b, 5:]
if np.sum(classes) > 0:
# first 4 elements are x, y, w, and h
x, y, w, h = output[row, col, b, :4]
x = (col + _sigmoid(x)
) / grid_w # center position, unit: image width
y = (row + _sigmoid(y)
) / grid_h # center position, unit: image height
w = anchors[b][0] * np.exp(w) / grid_w # unit: image width
h = anchors[b][1] * np.exp(h) / grid_h # unit: image height
confidence = output[row, col, b, 4]
x1 = max(x - w / 2, 0)
y1 = max(y - h / 2, 0)
x2 = min(x + w / 2, grid_w)
y2 = min(y + h / 2, grid_h)
box = BoundingBox(x1, y1, x2, y2, confidence, classes)
boxes.append(box)
# suppress non-maximal boxes
for c in range(nb_classes):
sorted_indices = list(
reversed(np.argsort([box.classes[c] for box in boxes])))
for ind, index_i in enumerate(sorted_indices):
if boxes[index_i].classes[c] == 0:
continue
for j in range(ind + 1, len(sorted_indices)):
index_j = sorted_indices[j]
# filter out redundant boxes (same class and overlapping too
# much)
if (boxes[index_i].iou(boxes[index_j]) >= nms_threshold) and (
c == boxes[index_i].get_label()) and (
c == boxes[index_j].get_label()):
boxes[index_j].score = 0
# remove the boxes which are less likely than a obj_threshold
boxes = [box for box in boxes if box.get_score() > obj_threshold]
return boxes
[docs]def parse_voc_annotations(gt_folder, image_folder, file_path, labels):
""" Loads PASCAL-VOC data.
Data is loaded using the groundtruth informations and stored in a
dictionary.
Args:
gt_folder (str): path to the folder containing ground truth files
image_folder (str): path to the folder containing the images
file_path (str): file containing the list of files to parse
labels (list): list of labels of interest
Returns:
dict: a dictionnary containing all data present in the ground truth file
"""
def _build_list(file_path):
"""
Builds a list of file names from a text file path.
Args:
file_path (string): path of the file containing the names
Returns:
list: list of file names
"""
file_list = []
with open(file_path, "r", encoding='UTF-8') as f:
for line in f:
file_list.append(line.rstrip())
return file_list
file_list = _build_list(file_path)
all_data = []
for f in file_list:
tree = et.parse(os.path.join(gt_folder, f) + ".xml")
data = {"boxes": []}
for elem in tree.iter():
if 'filename' in elem.tag:
data['image_path'] = os.path.join(image_folder, elem.text)
if 'size' in elem.tag:
for attr in list(elem):
if 'width' in attr.tag:
width = int(attr.text)
if 'height' in attr.tag:
height = int(attr.text)
if 'depth' in attr.tag:
depth = int(attr.text)
data['image_shape'] = (width, height, depth)
if 'object' in elem.tag:
box = {}
for attr in list(elem):
if 'difficult' in attr.tag:
if int(attr.text) != 0:
box.clear()
break
if 'name' in attr.tag:
box['label'] = attr.text
if box['label'] not in labels:
box.clear()
break
if 'bndbox' in attr.tag:
for dim in list(attr):
if 'xmin' in dim.tag:
box['x1'] = int(round(float(dim.text)))
if 'ymin' in dim.tag:
box['y1'] = int(round(float(dim.text)))
if 'xmax' in dim.tag:
box['x2'] = int(round(float(dim.text)))
if 'ymax' in dim.tag:
box['y2'] = int(round(float(dim.text)))
if len(box) == 5:
data["boxes"].append(box)
if len(data["boxes"]) != 0:
all_data.append(data)
return all_data
[docs]def parse_widerface_annotations(gt_file, image_folder):
""" Loads WiderFace data.
Data is loaded using the groundtruth informations and stored in a
dictionary.
Args:
gt_file (str): path to the ground truth file
image_folder (str): path to the directory containing the images
Returns:
dict: a dictionnary containing all data present in the ground truth file
"""
def _is_valid_box(w_box, h_box, w_img, h_img):
box_area = w_box * h_box
img_area = w_img * h_img
return box_area >= img_area / 60.0
all_data = []
with open(gt_file, "r", encoding='UTF-8') as f:
for line in f:
file_path = line.rstrip()
full_file_path = os.path.join(image_folder, str(file_path))
if not os.path.isfile(full_file_path):
raise RuntimeError(f"Not a file path: {full_file_path}")
image = load_image(full_file_path)
shape = (image.shape[1], image.shape[0], image.shape[2])
data = {
"image_path": full_file_path,
"image_shape": shape,
"boxes": []
}
nb_boxes = int(f.readline())
if nb_boxes == 0:
f.readline()
for _ in range(nb_boxes):
annotation = f.readline()
values = annotation.split(' ')
x1 = int(values[0])
y1 = int(values[1])
w = int(values[2])
h = int(values[3])
if _is_valid_box(w, h, shape[0], shape[1]):
box = {
"x1": x1,
"y1": y1,
"x2": x1 + w,
"y2": y1 + h,
"label": "face"
}
data["boxes"].append(box)
if len(data["boxes"]) != 0:
all_data.append(data)
return all_data