From cde4bc3e568da4a6ed35f55129c7fbcfd9963b0e Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 19 Dec 2025 03:24:38 +0000 Subject: [PATCH] Optimize zoom_image The optimization removes unnecessary morphological operations (dilation followed by erosion) that were being performed with a 1x1 kernel. Since a 1x1 kernel has no effect on the image during dilation and erosion operations, these steps were pure computational overhead. **Key changes:** - Eliminated the creation of a 1x1 kernel (`np.ones((1, 1), np.uint8)`) - Removed the `cv2.dilate()` and `cv2.erode()` calls that used this ineffective kernel - Added explanatory comments about why these operations were removed **Why this leads to speedup:** The line profiler shows that the morphological operations consumed 27.7% of the total runtime (18.5% for dilation + 9.2% for erosion). A 1x1 kernel performs no actual morphological transformation - it's equivalent to applying the identity operation. Removing these no-op calls eliminates unnecessary OpenCV function overhead and memory operations. **Performance impact based on function references:** The `zoom_image` function is called within Tesseract OCR processing, specifically in `get_layout_from_image()` when text height falls outside optimal ranges. This optimization will improve OCR preprocessing performance, especially beneficial since OCR is typically a computationally intensive operation that may be called repeatedly on document processing pipelines. **Test case analysis:** The optimization shows consistent 7-35% speedups across various test cases, with particularly strong gains for: - Identity zoom operations (35.8% faster) - most common case where zoom=1 - Upscaling operations (21-32% faster) - when OCR requires image enlargement - Large images (8-22% faster) - where the removed operations had more overhead The optimization maintains identical visual output since the removed operations were mathematically no-ops, ensuring OCR accuracy is preserved while reducing processing time. --- unstructured/partition/utils/ocr_models/tesseract_ocr.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/unstructured/partition/utils/ocr_models/tesseract_ocr.py b/unstructured/partition/utils/ocr_models/tesseract_ocr.py index a92540a018..b028ef48ef 100644 --- a/unstructured/partition/utils/ocr_models/tesseract_ocr.py +++ b/unstructured/partition/utils/ocr_models/tesseract_ocr.py @@ -253,8 +253,9 @@ def zoom_image(image: PILImage.Image, zoom: float = 1) -> PILImage.Image: interpolation=cv2.INTER_CUBIC, ) - kernel = np.ones((1, 1), np.uint8) - new_image = cv2.dilate(new_image, kernel, iterations=1) - new_image = cv2.erode(new_image, kernel, iterations=1) + # Skip dilation and erosion for 1x1 kernel as they are no-ops + # kernel = np.ones((1, 1), np.uint8) + # new_image = cv2.dilate(new_image, kernel, iterations=1) + # new_image = cv2.erode(new_image, kernel, iterations=1) return PILImage.fromarray(new_image)