|
43 | 43 | #if !defined CUDA_DISABLER |
44 | 44 |
|
45 | 45 | #include <cfloat> |
| 46 | +#include <cmath> |
46 | 47 | #include "opencv2/core/cuda/common.hpp" |
47 | 48 | #include "opencv2/core/cuda/border_interpolate.hpp" |
48 | 49 | #include "opencv2/core/cuda/vec_traits.hpp" |
|
53 | 54 |
|
54 | 55 | namespace cv { namespace cuda { namespace device |
55 | 56 | { |
| 57 | + __device__ float lanczos_weight(float x_) |
| 58 | + { |
| 59 | + float x = fabsf(x_); |
| 60 | + if (x == 0.0f) |
| 61 | + return 1.0f; |
| 62 | + if (x >= 4.0f) |
| 63 | + return 0.0f; |
| 64 | + float pi_x = M_PI * x; |
| 65 | + return sinf(pi_x) * sinf(pi_x / 4.0f) / (pi_x * pi_x / 4.0f); |
| 66 | + } |
| 67 | + |
56 | 68 | // kernels |
| 69 | + template <typename T> |
| 70 | + __global__ void resize_lanczos4(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx) |
| 71 | + { |
| 72 | + const int x = blockIdx.x * blockDim.x + threadIdx.x; |
| 73 | + const int y = blockIdx.y * blockDim.y + threadIdx.y; |
| 74 | + |
| 75 | + if (x >= dst.cols || y >= dst.rows) |
| 76 | + return; |
| 77 | + |
| 78 | + const float src_x = static_cast<float>(x) * fx; |
| 79 | + const float src_y = static_cast<float>(y) * fy; |
| 80 | + |
| 81 | + const int in_height = src.rows; |
| 82 | + const int in_width = src.cols; |
| 83 | + |
| 84 | + typedef typename VecTraits<T>::elem_type elem_type; |
| 85 | + constexpr int cn = VecTraits<T>::cn; |
| 86 | + float results[cn] = {0.0f}; |
| 87 | + |
| 88 | + for (int c = 0; c < cn; ++c) |
| 89 | + { |
| 90 | + float acc_val = 0.0f; |
| 91 | + float acc_weight = 0.0f; |
| 92 | + |
| 93 | + |
| 94 | + const int xmin = int(floorf(src_x)) - 3; |
| 95 | + const int xmax = int(floorf(src_x)) + 4; |
| 96 | + const int ymin = int(floorf(src_y)) - 3; |
| 97 | + const int ymax = int(floorf(src_y)) + 4; |
| 98 | + |
| 99 | + for (int cy = ymin; cy <= ymax; ++cy) |
| 100 | + { |
| 101 | + float wy = lanczos_weight(src_y - static_cast<float>(cy)); |
| 102 | + if (wy == 0.0f) |
| 103 | + continue; |
| 104 | + |
| 105 | + for (int cx = xmin; cx <= xmax; ++cx) |
| 106 | + { |
| 107 | + float wx = lanczos_weight(src_x - static_cast<float>(cx)); |
| 108 | + if (wx == 0.0f) |
| 109 | + continue; |
| 110 | + |
| 111 | + float w = wy * wx; |
| 112 | + |
| 113 | + int iy = ::max(0, ::min(cy, in_height - 1)); |
| 114 | + int ix = ::max(0, ::min(cx, in_width - 1)); |
| 115 | + |
| 116 | + T val = src(iy, ix); |
| 117 | + |
| 118 | + const elem_type* val_ptr = reinterpret_cast<const elem_type*>(&val); |
| 119 | + elem_type elem_val = val_ptr[c]; |
| 120 | + float channel_val = static_cast<float>(elem_val); |
| 121 | + |
| 122 | + acc_val += channel_val * w; |
| 123 | + acc_weight += w; |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + float result = acc_weight > 0.0f ? (acc_val / acc_weight) : 0.0f; |
| 128 | + results[c] = result; |
| 129 | + } |
| 130 | + |
| 131 | + T result_vec; |
| 132 | + elem_type* result_ptr = reinterpret_cast<elem_type*>(&result_vec); |
| 133 | + for (int c = 0; c < cn; ++c) |
| 134 | + { |
| 135 | + result_ptr[c] = saturate_cast<elem_type>(results[c]); |
| 136 | + } |
| 137 | + dst(y, x) = result_vec; |
| 138 | + } |
| 139 | + |
57 | 140 |
|
58 | 141 | template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx) |
59 | 142 | { |
@@ -243,6 +326,21 @@ namespace cv { namespace cuda { namespace device |
243 | 326 | cudaSafeCall( cudaDeviceSynchronize() ); |
244 | 327 | } |
245 | 328 |
|
| 329 | + // callers for lanczos interpolation |
| 330 | + |
| 331 | + template <typename T> |
| 332 | + void call_resize_lanczos4_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) |
| 333 | + { |
| 334 | + const dim3 block(32, 8); |
| 335 | + const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); |
| 336 | + |
| 337 | + resize_lanczos4<<<grid, block, 0, stream>>>(src, dst, fy, fx); |
| 338 | + cudaSafeCall( cudaGetLastError() ); |
| 339 | + |
| 340 | + if (stream == 0) |
| 341 | + cudaSafeCall( cudaDeviceSynchronize() ); |
| 342 | + } |
| 343 | + |
246 | 344 | // ResizeNearestDispatcher |
247 | 345 |
|
248 | 346 | template <typename T> struct ResizeNearestDispatcher |
@@ -352,6 +450,16 @@ namespace cv { namespace cuda { namespace device |
352 | 450 | template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {}; |
353 | 451 | template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {}; |
354 | 452 |
|
| 453 | + // ResizeLanczosDispatcher |
| 454 | + |
| 455 | + template <typename T> struct ResizeLanczosDispatcher |
| 456 | + { |
| 457 | + static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream) |
| 458 | + { |
| 459 | + call_resize_lanczos4_glob(src, dst, fy, fx, stream); |
| 460 | + } |
| 461 | + }; |
| 462 | + |
355 | 463 | // ResizeAreaDispatcher |
356 | 464 |
|
357 | 465 | template <typename T> struct ResizeAreaDispatcher |
@@ -393,18 +501,23 @@ namespace cv { namespace cuda { namespace device |
393 | 501 | template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream) |
394 | 502 | { |
395 | 503 | typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream); |
396 | | - static const func_t funcs[4] = |
| 504 | + static const func_t funcs[5] = |
397 | 505 | { |
398 | 506 | ResizeNearestDispatcher<T>::call, |
399 | 507 | ResizeLinearDispatcher<T>::call, |
400 | 508 | ResizeCubicDispatcher<T>::call, |
401 | | - ResizeAreaDispatcher<T>::call |
| 509 | + ResizeAreaDispatcher<T>::call, |
| 510 | + ResizeLanczosDispatcher<T>::call |
402 | 511 | }; |
403 | 512 |
|
404 | 513 | // change to linear if area interpolation upscaling |
405 | 514 | if (interpolation == 3 && (fx <= 1.f || fy <= 1.f)) |
406 | 515 | interpolation = 1; |
407 | 516 |
|
| 517 | + // Bounds check for interpolation mode |
| 518 | + if (interpolation < 0 || interpolation >= 5) |
| 519 | + interpolation = 1; // Default to linear |
| 520 | + |
408 | 521 | funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream); |
409 | 522 | } |
410 | 523 |
|
|
0 commit comments