diff --git a/unstructured/metrics/object_detection.py b/unstructured/metrics/object_detection.py index 7c28721518..0320214155 100644 --- a/unstructured/metrics/object_detection.py +++ b/unstructured/metrics/object_detection.py @@ -8,6 +8,7 @@ import numpy as np import torch +from numba import njit IOU_THRESHOLDS = torch.tensor( [0.5000, 0.5500, 0.6000, 0.6500, 0.7000, 0.7500, 0.8000, 0.8500, 0.9000, 0.9500] @@ -322,21 +323,11 @@ def _box_iou(box1: torch.Tensor, box2: torch.Tensor) -> torch.Tensor: iou: Tensor of shape [N, M]: the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ - - def box_area(box): - # box = 4xn - return (box[2] - box[0]) * (box[3] - box[1]) - - area1 = box_area(box1.T) - area2 = box_area(box2.T) - - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) - inter = ( - (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])) - .clamp(0) - .prod(2) - ) - return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) + # Convert torch tensors to numpy for numba acceleration + box1_np = box1.detach().cpu().numpy() + box2_np = box2.detach().cpu().numpy() + iou_np = _box_iou_np(box1_np, box2_np) + return torch.from_numpy(iou_np) def _compute_targets( self, @@ -697,6 +688,41 @@ def _compute_detection_metrics_per_cls( return ap, precision, recall +# Numba-accelerated IoU calculation for numpy arrays +@njit(fastmath=True, cache=True) +def _box_iou_np(box1: np.ndarray, box2: np.ndarray) -> np.ndarray: + """ + Return intersection-over-union (Jaccard index) of boxes. + Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + + Args: + box1: ndarray of shape [N, 4] + box2: ndarray of shape [M, 4] + + Returns: + iou: ndarray of shape [N, M]: Pairwise IoU values + """ + N = box1.shape[0] + M = box2.shape[0] + iou = np.zeros((N, M), dtype=np.float32) + + area1 = (box1[:, 2] - box1[:, 0]) * (box1[:, 3] - box1[:, 1]) # [N] + area2 = (box2[:, 2] - box2[:, 0]) * (box2[:, 3] - box2[:, 1]) # [M] + + for i in range(N): + for j in range(M): + xA = max(box1[i, 0], box2[j, 0]) + yA = max(box1[i, 1], box2[j, 1]) + xB = min(box1[i, 2], box2[j, 2]) + yB = min(box1[i, 3], box2[j, 3]) + inter_w = max(0.0, xB - xA) + inter_h = max(0.0, yB - yA) + inter = inter_w * inter_h + union = area1[i] + area2[j] - inter + iou[i, j] = inter / union if union > 0.0 else 0.0 + return iou + + if __name__ == "__main__": from dataclasses import asdict