NVIDIA · mdboom · Jan 12, 2026 · Jan 9, 2026 · Jan 9, 2026 · Jan 9, 2026
diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
@@ -15,6 +15,7 @@ from ._nvml_context cimport initialize
 include "_device_utils.pxi"
 
 
+BrandType = nvml.BrandType
 FieldId = nvml.FieldId
 
 
@@ -174,6 +175,77 @@ cdef class PciInfo:
         return self._pci_info.pci_device_id >> 16
 
 
+cdef class DeviceAttributes:
+    """
+    Various device attributes.
+    """
+    def __init__(self, attributes: nvml.DeviceAttributes):
+        self._attributes = attributes
+
+    @property
+    def multiprocessor_count(self) -> int:
+        """
+        The streaming multiprocessor count
+        """
+        return self._attributes.multiprocessor_count
+
+    @property
+    def shared_copy_engine_count(self) -> int:
+        """
+        The shared copy engine count
+        """
+        return self._attributes.shared_copy_engine_count
+
+    @property
+    def shared_decoder_count(self) -> int:
+        """
+        The shared decoder engine count
+        """
+        return self._attributes.shared_decoder_count
+
+    @property
+    def shared_encoder_count(self) -> int:
+        """
+        The shared encoder engine count
+        """
+        return self._attributes.shared_encoder_count
+
+    @property
+    def shared_jpeg_count(self) -> int:
+        """
+        The shared JPEG engine count
+        """
+        return self._attributes.shared_jpeg_count
+
+    @property
+    def shared_ofa_count(self) -> int:
+        """
+        The shared optical flow accelerator (OFA) engine count
+        """
+        return self._attributes.shared_ofa_count
+
+    @property
+    def gpu_instance_slice_count(self) -> int:
+        """
+        The GPU instance slice count
+        """
+        return self._attributes.gpu_instance_slice_count
+
+    @property
+    def compute_instance_slice_count(self) -> int:
+        """
+        The compute instance slice count
+        """
+        return self._attributes.compute_instance_slice_count
+
+    @property
+    def memory_size_mb(self) -> int:
+        """
+        Device memory size in MiB
-        Device memory size in MiB
+        Device memory size in MB
-        Device memory size in MiB
+        Device memory size in MB
+        """
+        return self._attributes.memory_size_mb
+
+
 cdef class FieldValue:
     """
     Represents the data from a single field value.
@@ -317,39 +389,56 @@ cdef class Device:
     about devices and their topology, as provided by the NVIDIA Management
     Library (NVML).  To use CUDA with a device, use :class:`cuda.core.Device`.
 
+    Creating a device instance causes NVML to initialize the target GPU.
+    NVML may initialize additional GPUs if the target GPU is an SLI slave.
+
     Parameters
     ----------
     index: int, optional
-        Integer representing the CUDA device index to get a handle to.
+        Integer representing the CUDA device index to get a handle to.  Valid
+        values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.
+
+        The order in which devices are enumerated has no guarantees of
+        consistency between reboots.  For that reason, it is recommended that
+        devices are looked up by their PCI ids or UUID.
+
     uuid: bytes or str, optional
         UUID of a CUDA device to get a handle to.
 
+    pci_bus_id: bytes or str, optional
+        PCI bus ID of a CUDA device to get a handle to.
+
     Raises
     ------
     ValueError
-        If neither `index` nor `uuid` are specified or if both are specified.
+        If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
     """
 
     cdef intptr_t _handle
 
-    def __init__(self, index: int | None = None, uuid: bytes | str | None = None):
+    def __init__(self, index: int | None = None, uuid: bytes | str | None = None, pci_bus_id: bytes | str | None = None):
         initialize()
 
-        if index is not None and uuid is not None:
-            raise ValueError("Handle requires only one of either device `index` or `uuid`.")
-        if index is None and uuid is None:
-            raise ValueError("Handle requires either a device `index` or `uuid`.")
+        args = [index, uuid, pci_bus_id]
+        arg_count = sum(x is not None for x in args)
+
+        if arg_count > 1:
+            raise ValueError("Handle requires only one of either device `index`, `uuid` or `pci_bus_id`.")
+        if arg_count == 0:
+            raise ValueError("Handle requires either a device `index`, `uuid` or `pci_bus_id`.")
 
         if index is not None:
             self._handle = nvml.device_get_handle_by_index_v2(index)
-        else:
+        elif uuid is not None:
             if isinstance(uuid, bytes):
                 uuid = uuid.decode("ascii")
             self._handle = nvml.device_get_handle_by_uuid(uuid)
-
-    @property
-    def handle(self) -> int:
-        return self._handle
+        elif pci_bus_id is not None:
+            if isinstance(pci_bus_id, bytes):
+                pci_bus_id = pci_bus_id.decode("ascii")
+            self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
+        else:
+            raise ValueError("Error parsing arguments")
 
     @classmethod
     def get_all_devices(cls) -> Iterable[Device]:
@@ -427,6 +516,24 @@ cdef class Device:
         """
         return nvml.device_get_name(self._handle)
 
+    @property
+    def brand(self) -> BrandType:
+        """
+        Brand of the device
+        """
+        return BrandType(nvml.device_get_brand(self._handle))
+
+    @property
+    def index(self) -> int:
+        """
+        The NVML index of this device.
+
+        The order in which NVML enumerates devices has no guarantees of
+        consistency between reboots. For that reason it is recommended that
+        devices be looked up by their PCI ids or GPU UUID.
+        """
+        return nvml.device_get_index(self._handle)
+
     @property
     def pci_info(self) -> PciInfo:
         """
@@ -451,6 +558,39 @@ cdef class Device:
         """
         return nvml.device_get_uuid(self._handle)
 
+    @property
+    def attributes(self) -> DeviceAttributes:
+        """
+        Get various device attributes.
+
+        For Ampere™ or newer fully supported devices.  Only available on Linux
+        systems.
+        """
+        return DeviceAttributes(nvml.device_get_attributes_v2(self._handle))
+
+    @property
+    def is_c2c_mode_enabled(self) -> bool:
+        """
+        Whether the C2C (Chip-to-Chip) mode is enabled for this device.
+        """
+        return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled)
+
+    @property
+    def persistence_mode_enabled(self) -> bool:
+        """
+        Whether persistence mode is enabled for this device.
+
+        For Linux only.
+        """
+        return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED
+
+    @persistence_mode_enabled.setter
+    def persistence_mode_enabled(self, enabled: bool) -> None:
+        nvml.device_set_persistence_mode(
+            self._handle,
+            nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED
+        )
+
     def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
         """
         Get multiple field values from the device.
@@ -494,8 +634,10 @@ cdef class Device:
 
 __all__ = [
     "BAR1MemoryInfo",
+    "BrandType",
     "Device",
     "DeviceArchitecture",
+    "DeviceAttributes",
     "FieldId",
     "FieldValue",
     "FieldValues",

diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst
@@ -75,22 +75,24 @@ CUDA system information and NVIDIA Management Library (NVML)
 
    system.get_driver_version
    system.get_driver_version_full
+   system.get_driver_branch
    system.get_num_devices
    system.get_nvml_version
    system.get_process_name
 
    :template: autosummary/cyclass.rst
 
    system.Device
+   system.BAR1MemoryInfo
+   system.BrandType
    system.DeviceArchitecture
+   system.DeviceAttributes
    system.FieldId
    system.FieldValue
    system.FieldValues
    system.MemoryInfo
-   system.BAR1MemoryInfo
    system.PciInfo
 
-
 .. module:: cuda.core.utils
 
 Utility functions

diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py
@@ -13,6 +13,7 @@
 import re
 import sys
 
+import helpers
 import pytest
 from cuda.core import system
 
@@ -27,11 +28,6 @@ def check_gpu_available():
         pytest.skip("No GPUs available to run device tests", allow_module_level=True)
 
 
-def test_device_index_handle():
-    for device in system.Device.get_all_devices():
-        assert isinstance(device.handle, int)
-
-
 def test_device_architecture():
     for device in system.Device.get_all_devices():
         device_arch = device.architecture
@@ -191,6 +187,77 @@ def test_unpack_bitmask_single_value():
         _device._unpack_bitmask(1)
 
 
+def test_device_brand():
+    for device in system.Device.get_all_devices():
+        brand = device.brand
+        assert isinstance(brand, system.BrandType)
+        assert isinstance(brand.name, str)
+        assert isinstance(brand.value, int)
+
+
+def test_device_pci_bus_id():
+    for device in system.Device.get_all_devices():
+        pci_bus_id = device.pci_info.bus_id
+        assert isinstance(pci_bus_id, str)
+
+        new_device = system.Device(pci_bus_id=device.pci_info.bus_id)
+        assert new_device.index == device.index
+
+
+@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows")
+def test_device_attributes():
+    skip_reasons = []
+
+    for device in system.Device.get_all_devices():
+        try:
+            attributes = device.attributes
+        except system.NotSupportedError:
+            skip_reasons.append(f"Device attributes not supported on '{device.name}'")
+            continue
+        assert isinstance(attributes, system.DeviceAttributes)
+
+        assert isinstance(attributes.multiprocessor_count, int)
+        assert attributes.multiprocessor_count > 0
+
+        assert isinstance(attributes.shared_copy_engine_count, int)
+        assert isinstance(attributes.shared_decoder_count, int)
+        assert isinstance(attributes.shared_encoder_count, int)
+        assert isinstance(attributes.shared_jpeg_count, int)
+        assert isinstance(attributes.shared_ofa_count, int)
+        assert isinstance(attributes.gpu_instance_slice_count, int)
+        assert isinstance(attributes.compute_instance_slice_count, int)
+        assert isinstance(attributes.memory_size_mb, int)
+        assert attributes.memory_size_mb > 0
+
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
+def test_c2c_mode_enabled():
+    skip_reasons = set()
+    for device in system.Device.get_all_devices():
+        try:
+            is_enabled = device.is_c2c_mode_enabled
+        except nvml.NotSupportedError:
+            skip_reasons.add(f"C2C mode info not supported on {device}")
+        else:
+            assert isinstance(is_enabled, bool)
+    if skip_reasons:
+        pytest.skip(" ; ".join(skip_reasons))
+
+
+@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Persistence mode not supported on WSL or Windows")
+def test_persistence_mode_enabled():
+    for device in system.Device.get_all_devices():
+        is_enabled = device.persistence_mode_enabled
+        assert isinstance(is_enabled, bool)
+        try:
+            device.persistence_mode_enabled = False
+            assert device.persistence_mode_enabled is False
+        finally:
+            device.persistence_mode_enabled = is_enabled
+
+
 def test_field_values():
     for device in system.Device.get_all_devices():
         # TODO: Are there any fields that return double's?  It would be good to