Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ if(CMAKE_CUDA_ARCHITECTURES)
endif()

project(cvcuda
LANGUAGES C CXX
VERSION 0.16.0
DESCRIPTION "CUDA-accelerated Computer Vision algorithms"
LANGUAGES C CXX
VERSION 0.15.0
DESCRIPTION "CUDA-accelerated Computer Vision algorithms"
)

# Make sure the cuda host compiler agrees with what we're using,
Expand Down
19 changes: 10 additions & 9 deletions src/cvcuda/priv/OpResize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -354,16 +354,14 @@ __global__ void CubicResize(SrcWrapper src, DstWrapper dst, int2 srcSize, int2 d

if (dstCoord.y < dstSize.y && dstCoord.x < dstSize.x)
{
float2 srcCoord = (cuda::DropCast<2>(dstCoord) + .5f) * scaleRatio - .5f;
int3 iSrcCoord{(int)floor(srcCoord.x), (int)floor(srcCoord.y), dstCoord.z};

float fx = srcCoord.x - iSrcCoord.x;
float fy = srcCoord.y - iSrcCoord.y;
const float2 srcCoord = (cuda::DropCast<2>(dstCoord) + .5f) * scaleRatio - .5f;
int3 baseCoord{(int)floor(srcCoord.x), (int)floor(srcCoord.y), dstCoord.z};

fx = (iSrcCoord.x < 1 || iSrcCoord.x >= srcSize.x - 3) ? 0 : fx;
const float fx = srcCoord.x - baseCoord.x;
const float fy = srcCoord.y - baseCoord.y;

iSrcCoord.y = cuda::max(1, cuda::min(iSrcCoord.y, srcSize.y - 3));
iSrcCoord.x = cuda::max(1, cuda::min(iSrcCoord.x, srcSize.x - 3));
const int xMax = srcSize.x - 1;
const int yMax = srcSize.y - 1;

float wx[4];
float wy[4];
Expand All @@ -376,10 +374,13 @@ __global__ void CubicResize(SrcWrapper src, DstWrapper dst, int2 srcSize, int2 d
#pragma unroll
for (int cy = -1; cy <= 2; cy++)
{
const int sy = cuda::min(cuda::max(baseCoord.y + cy, 0), yMax);
#pragma unroll
for (int cx = -1; cx <= 2; cx++)
{
sum += src[int3{iSrcCoord.x + cx, iSrcCoord.y + cy, iSrcCoord.z}] * (wx[cx + 1] * wy[cy + 1]);
const int sx = cuda::min(cuda::max(baseCoord.x + cx, 0), xMax);

sum += src[int3{sx, sy, baseCoord.z}] * (wx[cx + 1] * wy[cy + 1]);
}
}

Expand Down
9 changes: 6 additions & 3 deletions src/cvcuda/priv/legacy/random_resized_crop.cu
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, int2 srcSi
float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f + top);
int sy = cuda::round<cuda::RoundMode::DOWN, int>(fy);
fy -= sy;
sy = cuda::max(1, cuda::min(sy, height - 3));
const int syClamped = cuda::max(1, cuda::min(sy, height - 3));
fy += static_cast<float>(sy - syClamped);
sy = syClamped;

const float A = -0.75f;

Expand All @@ -196,8 +198,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, int2 srcSi
float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f + left);
int sx = cuda::round<cuda::RoundMode::DOWN, int>(fx);
fx -= sx;
fx *= ((sx >= 1) && (sx < width - 3));
sx = cuda::max(1, cuda::min(sx, width - 3));
const int sxClamped = cuda::max(1, cuda::min(sx, width - 3));
fx += static_cast<float>(sx - sxClamped);
sx = sxClamped;

float cX[4];
cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A;
Expand Down
9 changes: 6 additions & 3 deletions src/cvcuda/priv/legacy/random_resized_crop_var_shape.cu
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, const int
float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f + top);
int sy = cuda::round<cuda::RoundMode::DOWN, int>(fy);
fy -= sy;
sy = cuda::max(1, cuda::min(sy, height - 3));
const int syClamped = cuda::max(1, cuda::min(sy, height - 3));
fy += static_cast<float>(sy - syClamped);
sy = syClamped;

const float A = -0.75f;

Expand All @@ -208,8 +210,9 @@ __global__ void resize_cubic_v1(const SrcWrapper src, DstWrapper dst, const int
float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f + left);
int sx = cuda::round<cuda::RoundMode::DOWN, int>(fx);
fx -= sx;
fx *= ((sx >= 1) && (sx < width - 3));
sx = cuda::max(1, cuda::min(sx, width - 3));
const int sxClamped = cuda::max(1, cuda::min(sx, width - 3));
fx += static_cast<float>(sx - sxClamped);
sx = sxClamped;

float cX[4];
cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A;
Expand Down
9 changes: 6 additions & 3 deletions src/cvcuda/priv/legacy/resize_var_shape.cu
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,9 @@ __global__ void resize_bicubic(cuda::ImageBatchVarShapeWrap<const T> src, cuda::
float fy = (float)((dst_y + 0.5f) * scale_y - 0.5f);
int sy = cuda::round<cuda::RoundMode::DOWN, int>(fy);
fy -= sy;
sy = cuda::max(1, cuda::min(sy, height - 3));
const int syClamped = cuda::max(1, cuda::min(sy, height - 3));
fy += static_cast<float>(sy - syClamped); // rebase fractional offset after clamp
sy = syClamped;

const float A = -0.75f;

Expand All @@ -203,8 +205,9 @@ __global__ void resize_bicubic(cuda::ImageBatchVarShapeWrap<const T> src, cuda::
float fx = (float)((dst_x + 0.5f) * scale_x - 0.5f);
int sx = cuda::round<cuda::RoundMode::DOWN, int>(fx);
fx -= sx;
fx *= ((sx >= 1) && (sx < width - 3));
sx = cuda::max(1, cuda::min(sx, width - 3));
const int sxClamped = cuda::max(1, cuda::min(sx, width - 3));
fx += static_cast<float>(sx - sxClamped);
sx = sxClamped;

float cX[4];
cX[0] = ((A * (fx + 1.0f) - 5.0f * A) * (fx + 1.0f) + 8.0f * A) * (fx + 1.0f) - 4.0f * A;
Expand Down