@@ -81,17 +81,18 @@ namespace coder::HWY_NAMESPACE {
8181 void UnpremultiplyRGBA_HWY (const uint8_t *src, int srcStride,
8282 uint8_t *dst, int dstStride, int width,
8383 int height) {
84- const FixedTag<uint8_t , 16 > du8x16;
85- const FixedTag<uint16_t , 8 > du16x8;
86- const FixedTag<uint8_t , 8 > du8x8;
84+ #pragma omp parallel for num_threads(4) schedule(dynamic)
85+ for (int y = 0 ; y < height; ++y) {
8786
88- using VU8x16 = Vec<decltype (du8x16)>;
89- using VU16x8 = Vec<decltype (du16x8)>;
87+ const FixedTag<uint8_t , 16 > du8x16;
88+ const FixedTag<uint16_t , 8 > du16x8;
89+ const FixedTag<uint8_t , 8 > du8x8;
9090
91- VU16x8 mult255 = Set (du16x8, 255 );
91+ using VU8x16 = Vec<decltype (du8x16)>;
92+ using VU16x8 = Vec<decltype (du16x8)>;
93+
94+ VU16x8 mult255 = Set (du16x8, 255 );
9295
93- #pragma omp parallel for num_threads(4) schedule(dynamic)
94- for (int y = 0 ; y < height; ++y) {
9596 auto mSrc = reinterpret_cast <const uint8_t *>(src + y * srcStride);
9697 auto mDst = reinterpret_cast <uint8_t *>(dst + y * dstStride);
9798
@@ -191,17 +192,18 @@ namespace coder::HWY_NAMESPACE {
191192 void PremultiplyRGBA_HWY (const uint8_t *src, int srcStride,
192193 uint8_t *dst, int dstStride, int width,
193194 int height) {
194- const FixedTag<uint8_t , 16 > du8x16;
195- const FixedTag<uint16_t , 8 > du16x8;
196- const FixedTag<uint8_t , 8 > du8x8;
195+ #pragma omp parallel for num_threads(4) schedule(dynamic)
196+ for (int y = 0 ; y < height; ++y) {
197197
198- using VU8x16 = Vec<decltype (du8x16)>;
199- using VU16x8 = Vec<decltype (du16x8)>;
198+ const FixedTag<uint8_t , 16 > du8x16;
199+ const FixedTag<uint16_t , 8 > du16x8;
200+ const FixedTag<uint8_t , 8 > du8x8;
200201
201- VU16x8 mult255d2 = Set (du16x8, 255 / 2 );
202+ using VU8x16 = Vec<decltype (du8x16)>;
203+ using VU16x8 = Vec<decltype (du16x8)>;
204+
205+ VU16x8 mult255d2 = Set (du16x8, 255 / 2 );
202206
203- #pragma omp parallel for num_threads(4) schedule(dynamic)
204- for (int y = 0 ; y < height; ++y) {
205207 auto mSrc = reinterpret_cast <const uint8_t *>(src + y * srcStride);
206208 auto mDst = reinterpret_cast <uint8_t *>(dst + y * dstStride);
207209
0 commit comments