Skip to content

Commit 595cde3

Browse files
committed
base lanczos4
1 parent 5ed0a37 commit 595cde3

File tree

4 files changed

+398
-3
lines changed

4 files changed

+398
-3
lines changed

modules/cudawarping/src/cuda/resize.cu

Lines changed: 115 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#if !defined CUDA_DISABLER
4444

4545
#include <cfloat>
46+
#include <cmath>
4647
#include "opencv2/core/cuda/common.hpp"
4748
#include "opencv2/core/cuda/border_interpolate.hpp"
4849
#include "opencv2/core/cuda/vec_traits.hpp"
@@ -53,7 +54,89 @@
5354

5455
namespace cv { namespace cuda { namespace device
5556
{
57+
__device__ float lanczos_weight(float x_)
58+
{
59+
float x = fabsf(x_);
60+
if (x == 0.0f)
61+
return 1.0f;
62+
if (x >= 4.0f)
63+
return 0.0f;
64+
float pi_x = M_PI * x;
65+
return sinf(pi_x) * sinf(pi_x / 4.0f) / (pi_x * pi_x / 4.0f);
66+
}
67+
5668
// kernels
69+
template <typename T>
70+
__global__ void resize_lanczos4(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx)
71+
{
72+
const int x = blockIdx.x * blockDim.x + threadIdx.x;
73+
const int y = blockIdx.y * blockDim.y + threadIdx.y;
74+
75+
if (x >= dst.cols || y >= dst.rows)
76+
return;
77+
78+
const float src_x = static_cast<float>(x) * fx;
79+
const float src_y = static_cast<float>(y) * fy;
80+
81+
const int in_height = src.rows;
82+
const int in_width = src.cols;
83+
84+
typedef typename VecTraits<T>::elem_type elem_type;
85+
constexpr int cn = VecTraits<T>::cn;
86+
float results[cn] = {0.0f};
87+
88+
for (int c = 0; c < cn; ++c)
89+
{
90+
float acc_val = 0.0f;
91+
float acc_weight = 0.0f;
92+
93+
94+
const int xmin = int(floorf(src_x)) - 3;
95+
const int xmax = int(floorf(src_x)) + 4;
96+
const int ymin = int(floorf(src_y)) - 3;
97+
const int ymax = int(floorf(src_y)) + 4;
98+
99+
for (int cy = ymin; cy <= ymax; ++cy)
100+
{
101+
float wy = lanczos_weight(src_y - static_cast<float>(cy));
102+
if (wy == 0.0f)
103+
continue;
104+
105+
for (int cx = xmin; cx <= xmax; ++cx)
106+
{
107+
float wx = lanczos_weight(src_x - static_cast<float>(cx));
108+
if (wx == 0.0f)
109+
continue;
110+
111+
float w = wy * wx;
112+
113+
int iy = ::max(0, ::min(cy, in_height - 1));
114+
int ix = ::max(0, ::min(cx, in_width - 1));
115+
116+
T val = src(iy, ix);
117+
118+
const elem_type* val_ptr = reinterpret_cast<const elem_type*>(&val);
119+
elem_type elem_val = val_ptr[c];
120+
float channel_val = static_cast<float>(elem_val);
121+
122+
acc_val += channel_val * w;
123+
acc_weight += w;
124+
}
125+
}
126+
127+
float result = acc_weight > 0.0f ? (acc_val / acc_weight) : 0.0f;
128+
results[c] = result;
129+
}
130+
131+
T result_vec;
132+
elem_type* result_ptr = reinterpret_cast<elem_type*>(&result_vec);
133+
for (int c = 0; c < cn; ++c)
134+
{
135+
result_ptr[c] = saturate_cast<elem_type>(results[c]);
136+
}
137+
dst(y, x) = result_vec;
138+
}
139+
57140

58141
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx)
59142
{
@@ -243,6 +326,21 @@ namespace cv { namespace cuda { namespace device
243326
cudaSafeCall( cudaDeviceSynchronize() );
244327
}
245328

329+
// callers for lanczos interpolation
330+
331+
template <typename T>
332+
void call_resize_lanczos4_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
333+
{
334+
const dim3 block(32, 8);
335+
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
336+
337+
resize_lanczos4<<<grid, block, 0, stream>>>(src, dst, fy, fx);
338+
cudaSafeCall( cudaGetLastError() );
339+
340+
if (stream == 0)
341+
cudaSafeCall( cudaDeviceSynchronize() );
342+
}
343+
246344
// ResizeNearestDispatcher
247345

248346
template <typename T> struct ResizeNearestDispatcher
@@ -352,6 +450,16 @@ namespace cv { namespace cuda { namespace device
352450
template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
353451
template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
354452

453+
// ResizeLanczosDispatcher
454+
455+
template <typename T> struct ResizeLanczosDispatcher
456+
{
457+
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& /*srcWhole*/, int /*yoff*/, int /*xoff*/, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
458+
{
459+
call_resize_lanczos4_glob(src, dst, fy, fx, stream);
460+
}
461+
};
462+
355463
// ResizeAreaDispatcher
356464

357465
template <typename T> struct ResizeAreaDispatcher
@@ -393,18 +501,23 @@ namespace cv { namespace cuda { namespace device
393501
template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
394502
{
395503
typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
396-
static const func_t funcs[4] =
504+
static const func_t funcs[5] =
397505
{
398506
ResizeNearestDispatcher<T>::call,
399507
ResizeLinearDispatcher<T>::call,
400508
ResizeCubicDispatcher<T>::call,
401-
ResizeAreaDispatcher<T>::call
509+
ResizeAreaDispatcher<T>::call,
510+
ResizeLanczosDispatcher<T>::call
402511
};
403512

404513
// change to linear if area interpolation upscaling
405514
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
406515
interpolation = 1;
407516

517+
// Bounds check for interpolation mode
518+
if (interpolation < 0 || interpolation >= 5)
519+
interpolation = 1; // Default to linear
520+
408521
funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
409522
}
410523

modules/cudawarping/src/resize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ void cv::cuda::resize(InputArray _src, OutputArray _dst, Size dsize, double fx,
7070
};
7171

7272
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
73-
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
73+
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA || interpolation == INTER_LANCZOS4 );
7474
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
7575

7676
if (dsize == Size())

modules/cudawarping/test/interpolation.hpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,4 +128,56 @@ template <typename T> struct CubicInterpolator
128128
}
129129
};
130130

131+
template <typename T> struct LanczosInterpolator
132+
{
133+
static constexpr int A = 4;
134+
135+
static float lanczosCoeff(float x_)
136+
{
137+
float x = fabsf(x_);
138+
if (x == 0.0f)
139+
return 1.0f;
140+
if (x >= A)
141+
return 0.0f;
142+
143+
const float pi = 3.14159265358979323846f;
144+
float pi_x = pi * x;
145+
return sinf(pi_x) * sinf(pi_x / A) / (pi_x * pi_x / A);
146+
}
147+
148+
static T getValue(const cv::Mat& src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar())
149+
{
150+
const int xmin = (int) floorf(x) - A + 1;
151+
const int xmax = (int) floorf(x) + A;
152+
153+
const int ymin = (int) floorf(y) - A + 1;
154+
const int ymax = (int) floorf(y) + A;
155+
156+
float sum = 0.0f;
157+
float wsum = 0.0f;
158+
159+
for (int cy = ymin; cy <= ymax; ++cy)
160+
{
161+
float wy = lanczosCoeff(y - cy);
162+
if (wy == 0.0f)
163+
continue;
164+
165+
for (int cx = xmin; cx <= xmax; ++cx)
166+
{
167+
float wx = lanczosCoeff(x - cx);
168+
if (wx == 0.0f)
169+
continue;
170+
171+
const float w = wy * wx;
172+
sum += w * readVal<T>(src, cy, cx, c, border_type, borderVal);
173+
wsum += w;
174+
}
175+
}
176+
177+
float res = (!wsum)? 0 : sum / wsum;
178+
179+
return cv::saturate_cast<T>(res);
180+
}
181+
};
182+
131183
#endif // __OPENCV_TEST_INTERPOLATION_HPP__

0 commit comments

Comments
 (0)