Fix 4bit tensor unpacking (#118)

justinchuby · web-flow · commit 29c1875b15c8 · 2025-07-03T22:34:41.000Z
4bit tensor unpacking to numpy array was buggy before this fix. I
updated the logic to make sure we correctly handle the bytes when
converting to numpy.

Added unit tests for all numeric dtypes.

---------

Signed-off-by: Justin Chu &lt;justinchuby@users.noreply.github.com&gt;
diff --git a/src/onnx_ir/_core.py b/src/onnx_ir/_core.py
@@ -657,15 +657,13 @@ def _load(self):
             self._array = np.empty(self.shape.numpy(), dtype=self.dtype.numpy())
             return
         # Map the whole file into the memory
-        # TODO(justinchuby): Verify if this would exhaust the memory address space
         with open(self.path, "rb") as f:
             self.raw = mmap.mmap(
                 f.fileno(),
                 0,
                 access=mmap.ACCESS_READ,
             )
-        # Handle the byte order correctly by always using little endian
-        dt = np.dtype(self.dtype.numpy()).newbyteorder("<")
+
         if self.dtype in {
             _enums.DataType.INT4,
             _enums.DataType.UINT4,
@@ -675,16 +673,18 @@ def _load(self):
             dt = np.dtype(np.uint8).newbyteorder("<")
             count = self.size // 2 + self.size % 2
         else:
+            # Handle the byte order correctly by always using little endian
+            dt = np.dtype(self.dtype.numpy()).newbyteorder("<")
             count = self.size
+
         self._array = np.frombuffer(self.raw, dtype=dt, offset=self.offset or 0, count=count)
         shape = self.shape.numpy()
-        if self.dtype == _enums.DataType.INT4:
-            # Unpack the int4 arrays
-            self._array = _type_casting.unpack_int4(self._array, shape)
-        elif self.dtype == _enums.DataType.UINT4:
-            self._array = _type_casting.unpack_uint4(self._array, shape)
-        elif self.dtype == _enums.DataType.FLOAT4E2M1:
-            self._array = _type_casting.unpack_float4e2m1(self._array, shape)
+
+        if self.dtype.bitwidth == 4:
+            # Unpack the 4bit arrays
+            self._array = _type_casting.unpack_4bitx2(self._array, shape).view(
+                self.dtype.numpy()
+            )
         else:
             self._array = self._array.reshape(shape)
 
@@ -1071,15 +1071,7 @@ def numpy(self) -> np.ndarray:
         """
         array = self.numpy_packed()
         # ONNX IR returns the unpacked arrays
-        if self.dtype == _enums.DataType.INT4:
-            return _type_casting.unpack_int4(array, self.shape.numpy())
-        if self.dtype == _enums.DataType.UINT4:
-            return _type_casting.unpack_uint4(array, self.shape.numpy())
-        if self.dtype == _enums.DataType.FLOAT4E2M1:
-            return _type_casting.unpack_float4e2m1(array, self.shape.numpy())
-        raise TypeError(
-            f"PackedTensor only supports INT4, UINT4, FLOAT4E2M1, but got {self.dtype}"
-        )
+        return _type_casting.unpack_4bitx2(array, self.shape.numpy()).view(self.dtype.numpy())
 
     def numpy_packed(self) -> npt.NDArray[np.uint8]:
         """Return the tensor as a packed array."""
diff --git a/src/onnx_ir/_core_test.py b/src/onnx_ir/_core_test.py
@@ -2082,7 +2082,7 @@ def test_initialize_with_torch_tensor(self, _: str, dtype: ir.DataType):
         )
         np.testing.assert_array_equal(
             tensor.numpy(),
-            _type_casting._unpack_uint4_as_uint8(
+            _type_casting.unpack_4bitx2(
                 packed_data.numpy(force=True).view(np.uint8), dims=[2, 4]
             ).view(dtype.numpy()),
         )
diff --git a/src/onnx_ir/_type_casting.py b/src/onnx_ir/_type_casting.py
@@ -1,14 +1,12 @@
 # Copyright (c) ONNX Project Contributors
 # SPDX-License-Identifier: Apache-2.0
 """Numpy utilities for non-native type operation."""
-# TODO(justinchuby): Upstream the logic to onnx
 
 from __future__ import annotations
 
 import typing
 from collections.abc import Sequence
 
-import ml_dtypes
 import numpy as np
 
 if typing.TYPE_CHECKING:
@@ -28,9 +26,7 @@ def pack_4bitx2(array: np.ndarray) -> npt.NDArray[np.uint8]:
     return array_flat[0::2] | array_flat[1::2]  # type: ignore[return-type]
 
 
-def _unpack_uint4_as_uint8(
-    data: npt.NDArray[np.uint8], dims: Sequence[int]
-) -> npt.NDArray[np.uint8]:
+def unpack_4bitx2(data: npt.NDArray[np.uint8], dims: Sequence[int]) -> npt.NDArray[np.uint8]:
     """Convert a packed uint4 array to unpacked uint4 array represented as uint8.
 
     Args:
@@ -52,56 +48,3 @@ def _unpack_uint4_as_uint8(
         result = result[:-1]
     result.resize(dims, refcheck=False)
     return result
-
-
-def unpack_uint4(
-    data: npt.NDArray[np.uint8], dims: Sequence[int]
-) -> npt.NDArray[ml_dtypes.uint4]:
-    """Convert a packed uint4 array to unpacked uint4 array represented as uint8.
-
-    Args:
-        data: A numpy array.
-        dims: The dimensions are used to reshape the unpacked buffer.
-
-    Returns:
-        A numpy array of int8/uint8 reshaped to dims.
-    """
-    return _unpack_uint4_as_uint8(data, dims).view(ml_dtypes.uint4)
-
-
-def _extend_int4_sign_bits(x: npt.NDArray[np.uint8]) -> npt.NDArray[np.int8]:
-    """Extend 4-bit signed integer to 8-bit signed integer."""
-    return np.where((x >> 3) == 0, x, x | 0xF0).astype(np.int8)
-
-
-def unpack_int4(
-    data: npt.NDArray[np.uint8], dims: Sequence[int]
-) -> npt.NDArray[ml_dtypes.int4]:
-    """Convert a packed (signed) int4 array to unpacked int4 array represented as int8.
-
-    The sign bit is extended to the most significant bit of the int8.
-
-    Args:
-        data: A numpy array.
-        dims: The dimensions are used to reshape the unpacked buffer.
-
-    Returns:
-        A numpy array of int8 reshaped to dims.
-    """
-    unpacked = _unpack_uint4_as_uint8(data, dims)
-    return _extend_int4_sign_bits(unpacked).view(ml_dtypes.int4)
-
-
-def unpack_float4e2m1(
-    data: npt.NDArray[np.uint8], dims: Sequence[int]
-) -> npt.NDArray[ml_dtypes.float4_e2m1fn]:
-    """Convert a packed float4e2m1 array to unpacked float4e2m1 array.
-
-    Args:
-        data: A numpy array.
-        dims: The dimensions are used to reshape the unpacked buffer.
-
-    Returns:
-        A numpy array of float32 reshaped to dims.
-    """
-    return _unpack_uint4_as_uint8(data, dims).view(ml_dtypes.float4_e2m1fn)
diff --git a/src/onnx_ir/serde.py b/src/onnx_ir/serde.py
@@ -74,7 +74,6 @@
 
 if typing.TYPE_CHECKING:
     import google.protobuf.internal.containers as proto_containers
-    import numpy.typing as npt
 
 logger = logging.getLogger(__name__)
 
@@ -117,13 +116,6 @@ def _little_endian_dtype(dtype) -> np.dtype:
     return np.dtype(dtype).newbyteorder("<")
 
 
-def _unflatten_complex(
-    array: npt.NDArray[np.float32 | np.float64],
-) -> npt.NDArray[np.complex64 | np.complex128]:
-    """Convert the real representation of a complex dtype to the complex dtype."""
-    return array[::2] + 1j * array[1::2]
-
-
 @typing.overload
 def from_proto(proto: onnx.ModelProto) -> _core.Model: ...  # type: ignore[overload-overlap]
 @typing.overload
@@ -391,54 +383,88 @@ def numpy(self) -> np.ndarray:
                 "Cannot convert external tensor to numpy array. Use ir.ExternalTensor instead."
             )
 
+        shape = self._proto.dims
+
         if self._proto.HasField("raw_data"):
-            array = np.frombuffer(self._proto.raw_data, dtype=dtype.numpy().newbyteorder("<"))
-            # Cannot return now, because we may need to unpack 4bit tensors
-        elif dtype == _enums.DataType.STRING:
-            return np.array(self._proto.string_data).reshape(self._proto.dims)
-        elif self._proto.int32_data:
-            array = np.array(self._proto.int32_data, dtype=_little_endian_dtype(np.int32))
-            if dtype in {_enums.DataType.FLOAT16, _enums.DataType.BFLOAT16}:
-                # Reinterpret the int32 as float16 or bfloat16
-                array = array.astype(np.uint16).view(dtype.numpy())
-            elif dtype in {
+            if dtype.bitwidth == 4:
+                return _type_casting.unpack_4bitx2(
+                    np.frombuffer(self._proto.raw_data, dtype=np.uint8), shape
+                ).view(dtype.numpy())
+            return np.frombuffer(
+                self._proto.raw_data, dtype=dtype.numpy().newbyteorder("<")
+            ).reshape(shape)
+        if dtype == _enums.DataType.STRING:
+            return np.array(self._proto.string_data).reshape(shape)
+        if self._proto.int32_data:
+            assert dtype in {
+                _enums.DataType.BFLOAT16,
+                _enums.DataType.BOOL,
+                _enums.DataType.FLOAT16,
+                _enums.DataType.FLOAT4E2M1,
                 _enums.DataType.FLOAT8E4M3FN,
                 _enums.DataType.FLOAT8E4M3FNUZ,
                 _enums.DataType.FLOAT8E5M2,
                 _enums.DataType.FLOAT8E5M2FNUZ,
-            }:
-                array = array.astype(np.uint8).view(dtype.numpy())
-        elif self._proto.int64_data:
-            array = np.array(self._proto.int64_data, dtype=_little_endian_dtype(np.int64))
-        elif self._proto.uint64_data:
+                _enums.DataType.INT16,
+                _enums.DataType.INT32,
+                _enums.DataType.INT4,
+                _enums.DataType.INT8,
+                _enums.DataType.UINT16,
+                _enums.DataType.UINT4,
+                _enums.DataType.UINT8,
+            }, f"Unsupported dtype {dtype} for int32_data"
+            array = np.array(self._proto.int32_data, dtype=_little_endian_dtype(np.int32))
+            if dtype.bitwidth == 32:
+                return array.reshape(shape)
+            if dtype.bitwidth == 16:
+                # Reinterpret the int32 as float16 or bfloat16
+                return array.astype(np.uint16).view(dtype.numpy()).reshape(shape)
+            if dtype.bitwidth == 8:
+                return array.astype(np.uint8).view(dtype.numpy()).reshape(shape)
+            if dtype.bitwidth == 4:
+                return _type_casting.unpack_4bitx2(array.astype(np.uint8), shape).view(
+                    dtype.numpy()
+                )
+            raise ValueError(
+                f"Unsupported dtype {dtype} for int32_data with bitwidth {dtype.bitwidth}"
+            )
+        if self._proto.int64_data:
+            assert dtype in {
+                _enums.DataType.INT64,
+            }, f"Unsupported dtype {dtype} for int64_data"
+            return np.array(
+                self._proto.int64_data, dtype=_little_endian_dtype(np.int64)
+            ).reshape(shape)
+        if self._proto.uint64_data:
+            assert dtype in {
+                _enums.DataType.UINT64,
+                _enums.DataType.UINT32,
+            }, f"Unsupported dtype {dtype} for uint64_data"
             array = np.array(self._proto.uint64_data, dtype=_little_endian_dtype(np.uint64))
-        elif self._proto.float_data:
+            if dtype == _enums.DataType.UINT32:
+                return array.astype(np.uint32).reshape(shape)
+            return array.reshape(shape)
+        if self._proto.float_data:
+            assert dtype in {
+                _enums.DataType.FLOAT,
+                _enums.DataType.COMPLEX64,
+            }, f"Unsupported dtype {dtype} for float_data"
             array = np.array(self._proto.float_data, dtype=_little_endian_dtype(np.float32))
             if dtype == _enums.DataType.COMPLEX64:
-                array = _unflatten_complex(array)
-        elif self._proto.double_data:
+                return array.view(np.complex64).reshape(shape)
+            return array.reshape(shape)
+        if self._proto.double_data:
+            assert dtype in {
+                _enums.DataType.DOUBLE,
+                _enums.DataType.COMPLEX128,
+            }, f"Unsupported dtype {dtype} for double_data"
             array = np.array(self._proto.double_data, dtype=_little_endian_dtype(np.float64))
             if dtype == _enums.DataType.COMPLEX128:
-                array = _unflatten_complex(array)
-        else:
-            # Empty tensor
-            if not self._proto.dims:
-                # When dims not precent and there is no data, we return an empty array
-                return np.array([], dtype=dtype.numpy())
-            else:
-                # Otherwise we return a size 0 array with the correct shape
-                return np.zeros(self._proto.dims, dtype=dtype.numpy())
-
-        if dtype == _enums.DataType.INT4:
-            return _type_casting.unpack_int4(array.astype(np.uint8), self._proto.dims)
-        elif dtype == _enums.DataType.UINT4:
-            return _type_casting.unpack_uint4(array.astype(np.uint8), self._proto.dims)
-        elif dtype == _enums.DataType.FLOAT4E2M1:
-            return _type_casting.unpack_float4e2m1(array.astype(np.uint8), self._proto.dims)
-        else:
-            # Otherwise convert to the correct dtype and reshape
-            # Note we cannot use view() here because the storage dtype may not be the same size as the target
-            return array.astype(dtype.numpy()).reshape(self._proto.dims)
+                return array.view(np.complex128).reshape(shape)
+            return array.reshape(shape)
+
+        # Empty tensor. We return a size 0 array with the correct shape
+        return np.zeros(shape, dtype=dtype.numpy())
 
     def tobytes(self) -> bytes:
         """Return the tensor as a byte string conformed to the ONNX specification, in little endian.
diff --git a/src/onnx_ir/serde_test.py b/src/onnx_ir/serde_test.py
@@ -1,5 +1,6 @@
 # Copyright (c) ONNX Project Contributors
 # SPDX-License-Identifier: Apache-2.0
+import itertools
 import unittest
 
 import google.protobuf.text_format
@@ -346,6 +347,74 @@ def test_tensor_proto_tensor_empty_tensor(self):
         # Test dlpack
         np.testing.assert_array_equal(np.from_dlpack(tensor), tensor.numpy())
 
+    @parameterized.parameterized.expand(
+        [
+            (name, dtype, array)
+            for (name, dtype), array in itertools.product(
+                [
+                    ("FLOAT", ir.DataType.FLOAT),
+                    ("UINT8", ir.DataType.UINT8),
+                    ("INT8", ir.DataType.INT8),
+                    ("UINT16", ir.DataType.UINT16),
+                    ("INT16", ir.DataType.INT16),
+                    ("INT32", ir.DataType.INT32),
+                    ("INT64", ir.DataType.INT64),
+                    ("BOOL", ir.DataType.BOOL),
+                    ("FLOAT16", ir.DataType.FLOAT16),
+                    ("DOUBLE", ir.DataType.DOUBLE),
+                    ("UINT32", ir.DataType.UINT32),
+                    ("UINT64", ir.DataType.UINT64),
+                    ("COMPLEX64", ir.DataType.COMPLEX64),
+                    ("COMPLEX128", ir.DataType.COMPLEX128),
+                    ("BFLOAT16", ir.DataType.BFLOAT16),
+                    ("FLOAT8E4M3FN", ir.DataType.FLOAT8E4M3FN),
+                    ("FLOAT8E4M3FNUZ", ir.DataType.FLOAT8E4M3FNUZ),
+                    ("FLOAT8E5M2", ir.DataType.FLOAT8E5M2),
+                    ("FLOAT8E5M2FNUZ", ir.DataType.FLOAT8E5M2FNUZ),
+                    ("UINT4", ir.DataType.UINT4),
+                    ("INT4", ir.DataType.INT4),
+                    ("FLOAT4E2M1", ir.DataType.FLOAT4E2M1),
+                ],
+                [
+                    np.array(
+                        [
+                            [-1000, -6, -1, -0.0, +0.0],
+                            [0.1, 0.25, 1, float("inf"), -float("inf")],
+                            [float("NaN"), -float("NaN"), 1000, 6.0, 0.001],
+                        ],
+                    ),
+                    np.array(42),
+                    np.array([]),
+                    np.array([[[], [], []]]),
+                ],
+            )
+        ]
+    )
+    def test_round_trip_numpy_conversion_from_raw_data(
+        self, _: str, onnx_dtype: ir.DataType, original_array: np.ndarray
+    ):
+        original_array = original_array.astype(onnx_dtype.numpy())
+        ir_tensor = ir.Tensor(original_array, name="test_tensor")
+        proto = serde.to_proto(ir_tensor)
+        if original_array.size > 0:
+            self.assertGreater(len(proto.raw_data), 0)
+        # tensor_proto_tensor from raw_data
+        tensor_proto_tensor = serde.from_proto(proto)
+        roundtrip_array = tensor_proto_tensor.numpy()
+        if onnx_dtype in {
+            ir.DataType.FLOAT8E5M2FNUZ,
+            ir.DataType.FLOAT8E5M2,
+            ir.DataType.FLOAT8E4M3FN,
+            ir.DataType.BFLOAT16,
+        }:
+            # There is a bug in ml_dtypes that causes equality checks to fail for these dtypes
+            # See https://github.com/jax-ml/ml_dtypes/issues/301
+            self.assertEqual(roundtrip_array.shape, original_array.shape)
+            self.assertEqual(roundtrip_array.dtype, original_array.dtype)
+            self.assertEqual(roundtrip_array.tobytes(), original_array.tobytes())
+        else:
+            np.testing.assert_equal(roundtrip_array, original_array, strict=True)
+
 
 class DeserializeGraphTest(unittest.TestCase):
     def test_deserialize_graph_handles_unsorted_graph(self):

Original file line number	Diff line number	Diff line change
`@@ -2082,7 +2082,7 @@ def test_initialize_with_torch_tensor(self, _: str, dtype: ir.DataType):`
`2082`	`2082`	`)`
`2083`	`2083`	`np.testing.assert_array_equal(`
`2084`	`2084`	`tensor.numpy(),`
`2085`		`- _type_casting._unpack_uint4_as_uint8(`
	`2085`	`+ _type_casting.unpack_4bitx2(`
`2086`	`2086`	`packed_data.numpy(force=True).view(np.uint8), dims=[2, 4]`
`2087`	`2087`	`).view(dtype.numpy()),`
`2088`	`2088`	`)`