Source code for helper

"""
Copyright (c) 2025 Sima Technologies, Inc.

SPDX-License-Identifier: Apache-2.0
"""
from typing import List
import cv2
import json
import math
import numpy as np
from pathlib import Path


[docs]
class YoloHelpers:
    """
    Yolo Helper class
    """
    @staticmethod

[docs]
    def nms(boxes, scores, iou_threshold):
        # Sort by score
        sorted_indices = np.argsort(scores)[::-1]
        keep_boxes = []
        while sorted_indices.size > 0:
            # Pick the last box
            box_id = sorted_indices[0]
            keep_boxes.append(box_id)

            # Compute IoU of the picked box with the rest
            ious = YoloHelpers.compute_iou(boxes[box_id, :], boxes[sorted_indices[1:], :])

            # Remove boxes with IoU over the threshold
            keep_indices = np.where(ious < iou_threshold)[0]

            # print(keep_indices.shape, sorted_indices.shape)
            sorted_indices = sorted_indices[keep_indices + 1]
        return keep_boxes


    @staticmethod

[docs]
    def bridge(image: np.ndarray, mask: np.ndarray, **kwds) -> np.ndarray:
        mask[mask > 0] = 1
        coords = np.column_stack(np.where(mask > 0))
        y_min, x_min = coords.min(axis=0)
        y_max, x_max = coords.max(axis=0)

        cropped_image = image[y_min:y_max, x_min:x_max]
        # save_image_dumps(cropped_image, "cropped.png")

        cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY)
        # save_image_dumps(cropped_image, "cropped_1.png")
        cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_GRAY2RGB)
        # save_image_dumps(cropped_image, "cropped_2.png")

        cropped_mask = mask[y_min:y_max, x_min:x_max]
        # save_image_dumps(cropped_mask, "cropped_mask.png")

        seg_seat = (cropped_image * cropped_mask[:, :, None]).astype(np.uint8)
        # save_image_dumps(seg_seat, "seg_seat.png")

        if YoloHelpers.flip_image(seg_seat):
            seg_seat = np.fliplr(seg_seat)
        return seg_seat, cropped_mask, (x_min, y_min), (x_max, y_max)


    @staticmethod

[docs]
    def flip_image(image: np.array) -> bool:
        _, mid, _ = image.shape
        mid = mid // 2
        return np.count_nonzero(image[:, :mid, :]) > np.count_nonzero(image[:, mid:, :])

    
    @staticmethod

[docs]
    def compute_iou(box, boxes):
        # Compute xmin, ymin, xmax, ymax for both boxes
        xmin = np.maximum(box[0], boxes[:, 0])
        ymin = np.maximum(box[1], boxes[:, 1])
        xmax = np.minimum(box[2], boxes[:, 2])
        ymax = np.minimum(box[3], boxes[:, 3])

        # Compute intersection area
        intersection_area = np.maximum(0, xmax - xmin) * np.maximum(0, ymax - ymin)

        # Compute union area
        box_area = (box[2] - box[0]) * (box[3] - box[1])
        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
        union_area = box_area + boxes_area - intersection_area

        # Compute IoU
        iou = intersection_area / union_area
        return iou


    @staticmethod

[docs]
    def xywh2xyxy(x):
        # Convert bounding box (x, y, w, h) to bounding box (x1, y1, x2, y2)
        y = np.copy(x)
        y[..., 0] = x[..., 0] - x[..., 2] / 2
        y[..., 1] = x[..., 1] - x[..., 3] / 2
        y[..., 2] = x[..., 0] + x[..., 2] / 2
        y[..., 3] = x[..., 1] + x[..., 3] / 2
        return y


    @staticmethod

[docs]
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))


    @staticmethod

[docs]
    def rescale_boxes(boxes, input_shape, image_shape):
        # Rescale boxes to original image dimensions
        input_shape = np.array([input_shape[1], input_shape[0], input_shape[1], input_shape[0]])
        boxes = np.divide(boxes, input_shape, dtype=np.float32)
        boxes *= np.array([image_shape[1], image_shape[0], image_shape[1], image_shape[0]])
        return boxes


    @staticmethod

[docs]
    def extract_boxes(box_predictions, img_height, img_width):
        # Extract boxes from predictions
        boxes = box_predictions[:, :4]

        # Scale boxes to original image dimensions
        boxes = YoloHelpers.rescale_boxes(boxes, (640, 640), (img_height, img_width))

        # Convert boxes to xyxy format
        boxes = YoloHelpers.xywh2xyxy(boxes)

        # Check the boxes are within the image
        boxes[:, 0] = np.clip(boxes[:, 0], 0, img_width)
        boxes[:, 1] = np.clip(boxes[:, 1], 0, img_height)
        boxes[:, 2] = np.clip(boxes[:, 2], 0, img_width)
        boxes[:, 3] = np.clip(boxes[:, 3], 0, img_height)

        return boxes


    @staticmethod

[docs]
    def process_box_output(box_output, img_height=3024, img_width=4032, conf_thres=0.5, iou_thres=0.5, num_masks=32):
        predictions = np.squeeze(box_output).T
        num_classes = box_output.shape[1] - num_masks - 4

        # Filter out object confidence scores below threshold
        scores = np.max(predictions[:, 4 : 4 + num_classes], axis=1)
        predictions = predictions[scores > conf_thres, :]
        scores = scores[scores > conf_thres]

        if len(scores) == 0:
            return [], [], [], np.array([])

        box_predictions = predictions[..., : num_classes + 4]
        mask_predictions = predictions[..., num_classes + 4 :]

        # Get the class with the highest confidence
        class_ids = np.argmax(box_predictions[:, 4:], axis=1)

        # Get bounding boxes for each object
        boxes = YoloHelpers.extract_boxes(box_predictions, img_height, img_width)

        # Apply non-maxima suppression to suppress weak, overlapping bounding boxes
        indices = YoloHelpers.nms(boxes, scores, iou_thres)

        return boxes[indices], scores[indices], class_ids[indices], mask_predictions[indices]


    @staticmethod

[docs]
    def process_mask_output(mask_predictions, mask_output, boxes, img_height, img_width):
        if mask_predictions.shape[0] == 0:
            return []

        mask_output = np.squeeze(mask_output)

        # Calculate the mask maps for each box
        num_mask, mask_height, mask_width = mask_output.shape  # CHW
        masks = YoloHelpers.sigmoid(mask_predictions @ mask_output.reshape((num_mask, -1)))
        masks = masks.reshape((-1, mask_height, mask_width))

        # Downscale the boxes to match the mask size
        scale_boxes = YoloHelpers.rescale_boxes(boxes, (img_height, img_width), (mask_height, mask_width))
        
        # For every box/mask pair, get the mask map
        mask_maps = np.zeros((len(scale_boxes), img_height, img_width))
        blur_size = (int(img_width / mask_width), int(img_height / mask_height))
        for i in range(len(scale_boxes)):
            scale_x1 = int(math.floor(scale_boxes[i][0]))
            scale_y1 = int(math.floor(scale_boxes[i][1]))
            scale_x2 = int(math.ceil(scale_boxes[i][2]))
            scale_y2 = int(math.ceil(scale_boxes[i][3]))

            x1 = int(math.floor(boxes[i][0]))
            y1 = int(math.floor(boxes[i][1]))
            x2 = int(math.ceil(boxes[i][2]))
            y2 = int(math.ceil(boxes[i][3]))

            scale_crop_mask = masks[i][scale_y1:scale_y2, scale_x1:scale_x2]
            crop_mask = cv2.resize(scale_crop_mask, (x2 - x1, y2 - y1), interpolation=cv2.INTER_CUBIC)

            crop_mask = cv2.blur(crop_mask, blur_size)

            crop_mask = (crop_mask > 0.5).astype(np.uint8)
            mask_maps[i, y1:y2, x1:x2] = crop_mask

        return mask_maps




[docs]
def postprocess(outputs: List[np.ndarray], input_height, input_width) -> np.ndarray:
    boxes, scores, class_ids, mask_pred = YoloHelpers.process_box_output(outputs[0], input_height, input_width)
    mask_maps = YoloHelpers.process_mask_output(mask_pred, outputs[1], boxes, input_height, input_width)
    return boxes, scores, class_ids, mask_maps



[docs]
def load_threshold(path: Path):
    """
    Helper function to load the threshold
    """
    with path.open('r') as f:
        threshold = json.load(f)

    cls_thres = threshold['cls_thresh']
    seg_thres = threshold['seg_thresh']
    max_score = threshold['max_score']
    min_score = threshold['min_score']
    return cls_thres, seg_thres, max_score, min_score




[docs]
def load_quantile(path: Path):
    """
    Helper function to load the quantile
    """
    with path.open('r') as f:
        quantiles = json.load(f)
    return (
        np.array(quantiles['q_st_start']),
        np.array(quantiles['q_st_end']),
        np.array(quantiles['q_ae_start']),
        np.array(quantiles['q_ae_end']),
    )




[docs]
def load_mean_std(path: Path):
    """
    Helper function to load the mean and standard deviation
    """
    with open(path, 'r') as f:
        mean_std_dict = json.load(f)
    mean = np.array(mean_std_dict['mean'])
    std = np.array(mean_std_dict['std'])
    return mean, std



[docs]
def merge_levels(preds: List[np.ndarray]) -> np.ndarray:
    b, d = preds[0].shape[:2]
    return np.concatenate([pred.reshape(b, d, -1) for pred in preds], -1)



[docs]
def effad_postproc(yolo_output: dict, effad_output: dict, cls_threshold: float) -> dict:
    eps = float('1e-7')

    crop_height, crop_width, c = yolo_output["seg_seat"].shape

    x = effad_output
    x = x[0, 0]
    x = cv2.resize(x, (crop_width, crop_height))
    x_norm = (x - np.min(x)) / (np.max(x) - np.min(x) + eps)
    pred = x_norm

    res = np.max(x) >= cls_threshold

    cropped_mask = yolo_output["cropped_mask"]
    mask = yolo_output["mask"]
    x_min, y_min = yolo_output["x_y_min"]
    x_max, y_max = yolo_output["x_y_max"]

    pred[cropped_mask == 0] = 0
    output = np.zeros_like(mask)
    output[y_min:y_max, x_min:x_max] = pred

    return {
        "pred": output,
        "res": res
    }