diff --git a/invokeai/backend/image_util/imwatermark/vendor.py b/invokeai/backend/image_util/imwatermark/vendor.py
index ef06274ff73..ed840cbfa97 100644
--- a/invokeai/backend/image_util/imwatermark/vendor.py
+++ b/invokeai/backend/image_util/imwatermark/vendor.py
@@ -7,9 +7,10 @@
 # `opencv-contrib-python`. It's easier to copy the code over than complicate the installation process by
 # requiring an extra post-install step of removing `opencv-python` and installing `opencv-contrib-python`.
 
+import base64
 import struct
 import uuid
-import base64
+
 import cv2
 import numpy as np
 import pywt
@@ -188,14 +189,18 @@ def encode(self, bgr):
 
         yuv = cv2.cvtColor(bgr, cv2.COLOR_BGR2YUV)
 
+        block_size = self._block
+        rows4 = row // 4 * 4
+        cols4 = col // 4 * 4
+
+        # Precompute slices for block access for efficiency
         for channel in range(2):
             if self._scales[channel] <= 0:
                 continue
 
-            ca1, (h1, v1, d1) = pywt.dwt2(yuv[: row // 4 * 4, : col // 4 * 4, channel], "haar")
+            ca1, (h1, v1, d1) = pywt.dwt2(yuv[:rows4, :cols4, channel], "haar")
             self.encode_frame(ca1, self._scales[channel])
-
-            yuv[: row // 4 * 4, : col // 4 * 4, channel] = pywt.idwt2((ca1, (v1, h1, d1)), "haar")
+            yuv[:rows4, :cols4, channel] = pywt.idwt2((ca1, (v1, h1, d1)), "haar")
 
         bgr_encoded = cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR)
         return bgr_encoded
@@ -286,19 +291,30 @@ def encode_frame(self, frame, scale):
 
         For i-th block, we encode watermark[i] bit into it
         """
-        (row, col) = frame.shape
-        num = 0
-        for i in range(row // self._block):
-            for j in range(col // self._block):
-                block = frame[
-                    i * self._block : i * self._block + self._block, j * self._block : j * self._block + self._block
-                ]
-                wmBit = self._watermarks[(num % self._wmLen)]
+        block = self._block
+        wmLen = self._wmLen
+        watermarks = self._watermarks
 
-                diffusedBlock = self.diffuse_dct_matrix(block, wmBit, scale)
-                # diffusedBlock = self.diffuse_dct_svd(block, wmBit, scale)
-                frame[
-                    i * self._block : i * self._block + self._block, j * self._block : j * self._block + self._block
-                ] = diffusedBlock
+        row, col = frame.shape
 
-                num = num + 1
+        # Use np.ndarray.astype if frame is float64 and watermark bits are int, but this isn't necessary here
+
+        num_blocks_row = row // block
+        num_blocks_col = col // block
+
+        # For the main encode loop, combine both indices for more cache locality and less Python looping overhead
+        # Use memoryviews for slicing to speed up assignment and block access
+        for i in range(num_blocks_row):
+            i_start = i * block
+            i_end = i_start + block
+            for j in range(num_blocks_col):
+                j_start = j * block
+                j_end = j_start + block
+
+                block_data = frame[i_start:i_end, j_start:j_end]
+                wmBit = watermarks[(i * num_blocks_col + j) % wmLen]
+                # Avoid an extra variable for 'num', just use the fast modulo calculation
+
+                # Optimized: use inplace modification of block_data in diffuse_dct_matrix, so assignment is not strictly needed
+                self.diffuse_dct_matrix(block_data, wmBit, scale)
+                # block_data is a view, so frame is mutated as intended