"""
Copyright (c) 2025 Sima Technologies, Inc.
SPDX-License-Identifier: Apache-2.0
"""
from typing import List
import cv2
import json
import math
import numpy as np
from pathlib import Path
[docs]
class YoloHelpers:
"""
Yolo Helper class
"""
@staticmethod
[docs]
def nms(boxes, scores, iou_threshold):
# Sort by score
sorted_indices = np.argsort(scores)[::-1]
keep_boxes = []
while sorted_indices.size > 0:
# Pick the last box
box_id = sorted_indices[0]
keep_boxes.append(box_id)
# Compute IoU of the picked box with the rest
ious = YoloHelpers.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])
# Remove boxes with IoU over the threshold
keep_indices = np.where(ious < iou_threshold)[0]
# print(keep_indices.shape, sorted_indices.shape)
sorted_indices = sorted_indices[keep_indices + 1]
return keep_boxes
@staticmethod
[docs]
def bridge(image: np.ndarray, mask: np.ndarray, **kwds) -> np.ndarray:
mask[mask > 0] = 1
coords = np.column_stack(np.where(mask > 0))
y_min, x_min = coords.min(axis=0)
y_max, x_max = coords.max(axis=0)
cropped_image = image[y_min:y_max, x_min:x_max]
# save_image_dumps(cropped_image, "cropped.png")
cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY)
# save_image_dumps(cropped_image, "cropped_1.png")
cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_GRAY2RGB)
# save_image_dumps(cropped_image, "cropped_2.png")
cropped_mask = mask[y_min:y_max, x_min:x_max]
# save_image_dumps(cropped_mask, "cropped_mask.png")
seg_seat = (cropped_image * cropped_mask[:, :, None]).astype(np.uint8)
# save_image_dumps(seg_seat, "seg_seat.png")
if YoloHelpers.flip_image(seg_seat):
seg_seat = np.fliplr(seg_seat)
return seg_seat, cropped_mask, (x_min, y_min), (x_max, y_max)
@staticmethod
[docs]
def flip_image(image: np.array) -> bool:
_, mid, _ = image.shape
mid = mid // 2
return np.count_nonzero(image[:, :mid, :]) > np.count_nonzero(image[:, mid:, :])
@staticmethod
[docs]
def compute_iou(box, boxes):
# Compute xmin, ymin, xmax, ymax for both boxes
xmin = np.maximum(box[0], boxes[:, 0])
ymin = np.maximum(box[1], boxes[:, 1])
xmax = np.minimum(box[2], boxes[:, 2])
ymax = np.minimum(box[3], boxes[:, 3])
# Compute intersection area
intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)
# Compute union area
box_area = (box[2] - box[0]) * (box[3] - box[1])
boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
union_area = box_area + boxes_area - intersection_area
# Compute IoU
iou = intersection_area / union_area
return iou
@staticmethod
[docs]
def xywh2xyxy(x):
# Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
@staticmethod
[docs]
def sigmoid(x):
return 1 / (1 + np.exp(-x))
@staticmethod
[docs]
def rescale_boxes(boxes, input_shape, image_shape):
# Rescale boxes to original image dimensions
input_shape = np.array([input_shape[1], input_shape[0], input_shape[1], input_shape[0]])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
return boxes
@staticmethod
@staticmethod
[docs]
def process_box_output(box_output, img_height=3024, img_width=4032, conf_thres=0.5, iou_thres=0.5, num_masks=32):
predictions = np.squeeze(box_output).T
num_classes = box_output.shape[1] - num_masks - 4
# Filter out object confidence scores below threshold
scores = np.max(predictions[:, 4 : 4 + num_classes], axis=1)
predictions = predictions[scores > conf_thres, :]
scores = scores[scores > conf_thres]
if len(scores) == 0:
return [], [], [], np.array([])
box_predictions = predictions[..., : num_classes + 4]
mask_predictions = predictions[..., num_classes + 4 :]
# Get the class with the highest confidence
class_ids = np.argmax(box_predictions[:, 4:], axis=1)
# Get bounding boxes for each object
boxes = YoloHelpers.extract_boxes(box_predictions, img_height, img_width)
# Apply non-maxima suppression to suppress weak, overlapping bounding boxes
indices = YoloHelpers.nms(boxes, scores, iou_thres)
return boxes[indices], scores[indices], class_ids[indices], mask_predictions[indices]
@staticmethod
[docs]
def process_mask_output(mask_predictions, mask_output, boxes, img_height, img_width):
if mask_predictions.shape[0] == 0:
return []
mask_output = np.squeeze(mask_output)
# Calculate the mask maps for each box
num_mask, mask_height, mask_width = mask_output.shape # CHW
masks = YoloHelpers.sigmoid(mask_predictions @ mask_output.reshape((num_mask, -1)))
masks = masks.reshape((-1, mask_height, mask_width))
# Downscale the boxes to match the mask size
scale_boxes = YoloHelpers.rescale_boxes(boxes, (img_height, img_width), (mask_height, mask_width))
# For every box/mask pair, get the mask map
mask_maps = np.zeros((len(scale_boxes), img_height, img_width))
blur_size = (int(img_width / mask_width), int(img_height / mask_height))
for i in range(len(scale_boxes)):
scale_x1 = int(math.floor(scale_boxes[i][0]))
scale_y1 = int(math.floor(scale_boxes[i][1]))
scale_x2 = int(math.ceil(scale_boxes[i][2]))
scale_y2 = int(math.ceil(scale_boxes[i][3]))
x1 = int(math.floor(boxes[i][0]))
y1 = int(math.floor(boxes[i][1]))
x2 = int(math.ceil(boxes[i][2]))
y2 = int(math.ceil(boxes[i][3]))
scale_crop_mask = masks[i][scale_y1:scale_y2, scale_x1:scale_x2]
crop_mask = cv2.resize(scale_crop_mask, (x2 - x1, y2 - y1), interpolation=cv2.INTER_CUBIC)
crop_mask = cv2.blur(crop_mask, blur_size)
crop_mask = (crop_mask > 0.5).astype(np.uint8)
mask_maps[i, y1:y2, x1:x2] = crop_mask
return mask_maps
[docs]
def postprocess(outputs: List[np.ndarray], input_height, input_width) -> np.ndarray:
boxes, scores, class_ids, mask_pred = YoloHelpers.process_box_output(outputs[0], input_height, input_width)
mask_maps = YoloHelpers.process_mask_output(mask_pred, outputs[1], boxes, input_height, input_width)
return boxes, scores, class_ids, mask_maps
[docs]
def load_threshold(path: Path):
"""
Helper function to load the threshold
"""
with path.open('r') as f:
threshold = json.load(f)
cls_thres = threshold['cls_thresh']
seg_thres = threshold['seg_thresh']
max_score = threshold['max_score']
min_score = threshold['min_score']
return cls_thres, seg_thres, max_score, min_score
[docs]
def load_quantile(path: Path):
"""
Helper function to load the quantile
"""
with path.open('r') as f:
quantiles = json.load(f)
return (
np.array(quantiles['q_st_start']),
np.array(quantiles['q_st_end']),
np.array(quantiles['q_ae_start']),
np.array(quantiles['q_ae_end']),
)
[docs]
def load_mean_std(path: Path):
"""
Helper function to load the mean and standard deviation
"""
with open(path, 'r') as f:
mean_std_dict = json.load(f)
mean = np.array(mean_std_dict['mean'])
std = np.array(mean_std_dict['std'])
return mean, std
[docs]
def merge_levels(preds: List[np.ndarray]) -> np.ndarray:
b, d = preds[0].shape[:2]
return np.concatenate([pred.reshape(b, d, -1) for pred in preds], -1)
[docs]
def effad_postproc(yolo_output: dict, effad_output: dict, cls_threshold: float) -> dict:
eps = float('1e-7')
crop_height, crop_width, c = yolo_output["seg_seat"].shape
x = effad_output
x = x[0, 0]
x = cv2.resize(x, (crop_width, crop_height))
x_norm = (x - np.min(x)) / (np.max(x) - np.min(x) + eps)
pred = x_norm
res = np.max(x) >= cls_threshold
cropped_mask = yolo_output["cropped_mask"]
mask = yolo_output["mask"]
x_min, y_min = yolo_output["x_y_min"]
x_max, y_max = yolo_output["x_y_max"]
pred[cropped_mask == 0] = 0
output = np.zeros_like(mask)
output[y_min:y_max, x_min:x_max] = pred
return {
"pred": output,
"res": res
}