Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions python/mod_cvcuda/nvcv/DataType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ namespace nvcvpy::priv {

namespace {

// Float16 type wrapper for numpy.float16 support
struct Float16
{
uint16_t data;
};

template<class T>
struct IsComplex : std::false_type
{
Expand Down Expand Up @@ -84,6 +90,47 @@ bool FindDataType(const py::dtype &dt, nvcv::DataType *dtype)

int itemsize = dtbase.itemsize();

// Special handling for Float16
if constexpr (std::is_same_v<T, Float16>)
{
if (dtbase.kind() == 'f' && itemsize == 2)
{
nvcv::PackingParams pp = {};
pp.byteOrder = nvcv::ByteOrder::MSB;

switch (nchannels)
{
case 1:
pp.swizzle = nvcv::Swizzle::S_X000;
break;
case 2:
pp.swizzle = nvcv::Swizzle::S_XY00;
break;
case 3:
pp.swizzle = nvcv::Swizzle::S_XYZ0;
break;
case 4:
pp.swizzle = nvcv::Swizzle::S_XYZW;
break;
default:
NVCV_ASSERT(!"Invalid number of channels");
}
for (int i = 0; i < nchannels; ++i)
{
pp.bits[i] = 16;
}
nvcv::Packing packing = MakePacking(pp);

NVCV_ASSERT(dtype != nullptr);
*dtype = nvcv::DataType{nvcv::DataKind::FLOAT, packing};
return true;
}
else
{
return false;
}
}

if (dtbase.equal(py::dtype::of<T>()))
{
nvcv::DataKind dataKind;
Expand Down Expand Up @@ -151,6 +198,7 @@ bool FindDataType(const py::dtype &dt, nvcv::DataType *dtype)
using SupportedBaseTypes = std::tuple<
std::complex<float>,
std::complex<double>,
Float16,
float, double,
uint8_t, int8_t,
uint16_t, int16_t,
Expand Down Expand Up @@ -181,6 +229,26 @@ bool FindDType(T *, const nvcv::DataType &dtype, py::dtype *dt)
int nchannels = dtype.numChannels();
int itemsize = dtype.bitsPerPixel() / 8;

// Special handling for Float16
if constexpr (std::is_same_v<T, Float16>)
{
if (dtype.dataKind() == nvcv::DataKind::FLOAT && itemsize / nchannels == 2)
{
NVCV_ASSERT(dt != nullptr);
*dt = py::dtype("float16");

if (nchannels > 1)
{
*dt = py::dtype(util::FormatString("%de", nchannels));
}
return true;
}
else
{
return false;
}
}

if (sizeof(T) != itemsize / nchannels)
{
return false;
Expand Down
14 changes: 10 additions & 4 deletions src/cvcuda/priv/legacy/convert_to.cu
Original file line number Diff line number Diff line change
Expand Up @@ -173,20 +173,26 @@ ErrorCode ConvertTo::infer(const TensorDataStridedCuda &inData, const TensorData
}

if (!(input_datatype == kCV_8U || input_datatype == kCV_8S || input_datatype == kCV_16U || input_datatype == kCV_16S
|| input_datatype == kCV_32S || input_datatype == kCV_32F || input_datatype == kCV_64F))
|| input_datatype == kCV_16F || input_datatype == kCV_32S || input_datatype == kCV_32F
|| input_datatype == kCV_64F))
{
LOG_ERROR("Invalid DataType " << input_datatype);
return ErrorCode::INVALID_DATA_TYPE;
}

if (!(output_datatype == kCV_8U || output_datatype == kCV_8S || output_datatype == kCV_16U
|| output_datatype == kCV_16S || output_datatype == kCV_32S || output_datatype == kCV_32F
|| output_datatype == kCV_64F))
|| output_datatype == kCV_16S || output_datatype == kCV_16F || output_datatype == kCV_32S
|| output_datatype == kCV_32F || output_datatype == kCV_64F))
{
LOG_ERROR("Invalid Converted DataType " << output_datatype);
return ErrorCode::INVALID_DATA_TYPE;
}

// Treat kCV_16F (float16) as kCV_16U (ushort) for conversion dispatch
// Float16 has the same bit width and can be safely converted using ushort operations
cuda_op::DataType input_dispatch = (input_datatype == kCV_16F) ? kCV_16U : input_datatype;
cuda_op::DataType output_dispatch = (output_datatype == kCV_16F) ? kCV_16U : output_datatype;

typedef ErrorCode (*func_t)(const nvcv::TensorDataStridedCuda &inData, const nvcv::TensorDataStridedCuda &outData,
int numChannels, const double alpha, const double beta, cudaStream_t stream);

Expand All @@ -202,7 +208,7 @@ ErrorCode ConvertTo::infer(const TensorDataStridedCuda &inData, const TensorData
};

// clang-format on
const func_t func = funcs[input_datatype][output_datatype];
const func_t func = funcs[input_dispatch][output_dispatch];
return func(inData, outData, channels, alpha, beta, stream);
}

Expand Down
142 changes: 142 additions & 0 deletions tests/cvcuda/python/test_float16_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Additional test for float16 support in CV-CUDA operations
Add these test cases to existing test files after float16 support is merged
"""

import cvcuda
import pytest as t
import numpy as np

try:
import cupy as cp

HAS_CUPY = True
except ImportError:
HAS_CUPY = False


@t.mark.parametrize(
"input_args,dtype,scale,offset",
[
# Float16 output test cases
(((5, 16, 23, 4), np.uint8, "NHWC"), np.float16, 1.0 / 255.0, 0.0),
(((16, 23, 3), np.uint8, "HWC"), np.float16, 1.0 / 255.0, 0.0),
(((1, 224, 224, 3), np.uint8, "NHWC"), np.float16, 0.5, -0.5),
# Float16 input to other types
(((5, 16, 23, 4), np.float16, "NHWC"), np.float32, 1.0, 0.0),
(((16, 23, 3), np.float16, "HWC"), np.uint8, 255.0, 0.0),
# Float16 to float16 conversion
(((1, 224, 224, 3), np.float16, "NHWC"), np.float16, 2.0, 0.5),
],
)
def test_op_convertto_float16(input_args, dtype, scale, offset):
"""Test convertto operator with float16 dtype"""
input = cvcuda.Tensor(*input_args)
out = cvcuda.convertto(input, dtype, scale, offset)
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == dtype

out = cvcuda.Tensor(input.shape, dtype, input.layout)
tmp = cvcuda.convertto_into(out, input, scale, offset)
assert tmp is out
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == dtype

stream = cvcuda.Stream()
out = cvcuda.convertto(
src=input, dtype=dtype, scale=scale, offset=offset, stream=stream
)
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == dtype


@t.mark.parametrize(
"input_args,base_args,scale_args",
[
# Float16 base/scale tensors with various input types
(
((5, 16, 23, 4), np.float32, "NHWC"),
((1, 1), np.float16, "HW"),
((1, 1), np.float16, "HW"),
),
(
((5, 16, 23, 4), np.float32, "NHWC"),
((1, 1, 4), np.float16, "HWC"),
((1, 1, 4), np.float16, "HWC"),
),
(
((1, 224, 224, 3), np.float32, "NHWC"),
((1, 1, 3), np.float16, "HWC"),
((1, 1, 3), np.float16, "HWC"),
),
],
)
def test_op_normalize_float16(input_args, base_args, scale_args):
"""Test normalize operator with float16 base/scale tensors"""
input = cvcuda.Tensor(*input_args)
base = cvcuda.Tensor(*base_args)
scale = cvcuda.Tensor(*scale_args)

out = cvcuda.normalize(input, base, scale)
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == input.dtype

out = cvcuda.Tensor(input.shape, input.dtype, input.layout)
tmp = cvcuda.normalize_into(out, input, base, scale)
assert tmp is out
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == input.dtype

stream = cvcuda.Stream()
out = cvcuda.normalize(
src=input,
base=base,
scale=scale,
flags=cvcuda.NormalizeFlags.SCALE_IS_STDDEV,
stream=stream,
)
assert out.layout == input.layout
assert out.shape == input.shape
assert out.dtype == input.dtype


@t.mark.skipif(not HAS_CUPY, reason="CuPy not available")
@t.mark.parametrize(
"shape,layout",
[
((224, 224, 3), "HWC"),
((1, 224, 224, 3), "NHWC"),
((16, 16, 4), "HWC"),
],
)
def test_as_tensor_float16(shape, layout):
"""Test as_tensor with float16 arrays (requires GPU memory)"""
# Create float16 cupy array (CUDA-accessible memory)
data = cp.random.randn(*shape).astype(np.float16)

# Create tensor from float16 data
tensor = cvcuda.as_tensor(data, layout=layout)

assert tensor.shape == shape
assert tensor.layout == layout
assert tensor.dtype == np.float16