From 19db1286578f1619505ebc7c237ce21d3d8293ad Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Jan 2026 09:24:18 -0500 Subject: [PATCH 1/5] cuda.core.system: Add device attributes, and other simple device-related APIs --- cuda_core/cuda/core/system/_device.pyx | 167 +++++++++++++++++-- cuda_core/docs/source/api.rst | 6 +- cuda_core/tests/system/test_system_device.py | 63 ++++++- 3 files changed, 217 insertions(+), 19 deletions(-) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 2b5ec242e8..7da611af45 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -15,6 +15,9 @@ from ._nvml_context cimport initialize include "_device_utils.pxi" +BrandType = nvml.BrandType + + class DeviceArchitecture: """ Device architecture enumeration. @@ -171,6 +174,77 @@ cdef class PciInfo: return self._pci_info.pci_device_id >> 16 +cdef class DeviceAttributes: + """ + Various device attributes. + """ + def __init__(self, attributes: nvml.DeviceAttributes): + self._attributes = attributes + + @property + def multiprocessor_count(self) -> int: + """ + The streaming multiprocessor count. + """ + return self._attributes.multiprocessor_count + + @property + def shared_copy_engine_count(self) -> int: + """ + The shared copy engine count + """ + return self._attributes.shared_copy_engine_count + + @property + def shared_decoder_count(self) -> int: + """ + The shared decoder engine count + """ + return self._attributes.shared_decoder_count + + @property + def shared_encoder_count(self) -> int: + """ + The shared encoder engine count + """ + return self._attributes.shared_encoder_count + + @property + def shared_jpeg_count(self) -> int: + """ + The shared JPEG engine count + """ + return self._attributes.shared_jpeg_count + + @property + def shared_ofa_count(self) -> int: + """ + The shared optical flow accelerator (OFA) engine count + """ + return self._attributes.shared_ofa_count + + @property + def gpu_instance_slice_count(self) -> int: + """ + The GPU instance slice count + """ + return self._attributes.gpu_instance_slice_count + + @property + def compute_instance_slice_count(self) -> int: + """ + The compute instance slice count + """ + return self._attributes.compute_instance_slice_count + + @property + def memory_size_mb(self) -> int: + """ + Device memory size in MiB + """ + return self._attributes.memory_size_mb + + cdef class Device: """ Representation of a device. @@ -179,39 +253,56 @@ cdef class Device: about devices and their topology, as provided by the NVIDIA Management Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`. + Creating a device instance causes NVML to initialize the target GPU. + NVML may initialize additional GPUs if the target GPU is an SLI slave. + Parameters ---------- index: int, optional - Integer representing the CUDA device index to get a handle to. + Integer representing the CUDA device index to get a handle to. Valid + values are between ``0`` and ``cuda.cure.system.get_num_devices() - 1``. + + The order in which devices are enumerated has no guarantees of + consistency between reboots. For that reason, it is recommended that + devices are looked up by their PCI ids or UUID. + uuid: bytes or str, optional UUID of a CUDA device to get a handle to. + pci_bus_id: bytes or str, optional + PCI bus ID of a CUDA device to get a handle to. + Raises ------ ValueError - If neither `index` nor `uuid` are specified or if both are specified. + If anything other than a single `index`, `uuid` or `pci_bus_id` are specified. """ cdef intptr_t _handle - def __init__(self, index: int | None = None, uuid: bytes | str | None = None): + def __init__(self, index: int | None = None, uuid: bytes | str | None = None, pci_bus_id: bytes | str | None = None): initialize() - if index is not None and uuid is not None: - raise ValueError("Handle requires only one of either device `index` or `uuid`.") - if index is None and uuid is None: - raise ValueError("Handle requires either a device `index` or `uuid`.") + args = [index, uuid, pci_bus_id] + arg_count = sum(x is not None for x in args) + + if arg_count > 1: + raise ValueError("Handle requires only one of either device `index`, `uuid` or `pci_bus_id`.") + if arg_count == 0: + raise ValueError("Handle requires either a device `index`, `uuid` or `pci_bus_id`.") if index is not None: self._handle = nvml.device_get_handle_by_index_v2(index) - else: + elif uuid is not None: if isinstance(uuid, bytes): uuid = uuid.decode("ascii") self._handle = nvml.device_get_handle_by_uuid(uuid) - - @property - def handle(self) -> int: - return self._handle + elif pci_bus_id is not None: + if isinstance(pci_bus_id, bytes): + pci_bus_id = pci_bus_id.decode("ascii") + self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) + else: + raise ValueError("Error parsing arguments") @classmethod def get_all_devices(cls) -> Iterable[Device]: @@ -289,6 +380,24 @@ cdef class Device: """ return nvml.device_get_name(self._handle) + @property + def brand(self) -> BrandType: + """ + Brand of the device + """ + return BrandType(nvml.device_get_brand(self._handle)) + + @property + def index(self) -> int: + """ + The NVML index of this device. + + The order in which NVML enumerates devices has no guarantees of + consistency between reboots. For that reason it is recommended that + devices be looked up by their PCI ids or GPU UUID. + """ + return nvml.device_get_index(self._handle) + @property def pci_info(self) -> PciInfo: """ @@ -313,11 +422,45 @@ cdef class Device: """ return nvml.device_get_uuid(self._handle) + @property + def attributes(self) -> DeviceAttributes: + """ + Get various device attributes. + + Only available on Linux systems. + """ + return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) + + @property + def is_c2c_mode_enabled(self) -> bool: + """ + Whether the C2C (Chip-to-Chip) mode is enabled for this device. + """ + return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) + + @property + def persistence_mode_enabled(self) -> bool: + """ + Whether persistence mode is enabled for this device. + + For Linux only. + """ + return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED + + @persistence_mode_enabled.setter + def persistence_mode_enabled(self, enabled: bool) -> None: + nvml.device_set_persistence_mode( + self._handle, + enabled and nvml.EnableState.FEATURE_ENABLED or nvml.EnableState.FEATURE_DISABLED + ) + __all__ = [ "BAR1MemoryInfo", + "BrandType", "Device", "DeviceArchitecture", + "DeviceAttributes", "MemoryInfo", "PciInfo", ] diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index 13e1f43a2f..001f81f0da 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -72,6 +72,7 @@ CUDA system information and NVIDIA Management Library (NVML) system.get_driver_version system.get_driver_version_full + system.get_driver_branch system.get_num_devices system.get_nvml_version system.get_process_name @@ -79,12 +80,13 @@ CUDA system information and NVIDIA Management Library (NVML) :template: autosummary/cyclass.rst system.Device + system.BAR1MemoryInfo + system.BrandType system.DeviceArchitecture + system.DeviceAttributes system.MemoryInfo - system.BAR1MemoryInfo system.PciInfo - .. module:: cuda.core.utils Utility functions diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 134ea7cbbe..e6d39fc309 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -13,6 +13,7 @@ import re import sys +import helpers import pytest from cuda.core import system from cuda.core.system import _device as system_device @@ -27,11 +28,6 @@ def check_gpu_available(): pytest.skip("No GPUs available to run device tests", allow_module_level=True) -def test_device_index_handle(): - for device in system.Device.get_all_devices(): - assert isinstance(device.handle, int) - - def test_device_architecture(): for device in system.Device.get_all_devices(): device_arch = device.architecture @@ -189,3 +185,60 @@ def test_unpack_bitmask(params): def test_unpack_bitmask_single_value(): with pytest.raises(TypeError): system_device._unpack_bitmask(1) + + +def test_device_brand(): + for device in system.Device.get_all_devices(): + brand = device.brand + assert isinstance(brand, system_device.BrandType) + assert isinstance(brand.name, str) + assert isinstance(brand.value, int) + + +def test_device_pci_bus_id(): + for device in system.Device.get_all_devices(): + pci_bus_id = device.pci_info.bus_id + assert isinstance(pci_bus_id, str) + + new_device = system.Device(pci_bus_id=device.pci_info.bus_id) + assert new_device.index == device.index + + +@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows") +def test_device_attributes(): + for device in system.Device.get_all_devices(): + attributes = device.attributes + assert isinstance(attributes, system_device.DeviceAttributes) + + assert isinstance(attributes.multiprocessor_count, int) + assert attributes.multiprocessor_count > 0 + + assert isinstance(attributes.shared_copy_engine_count, int) + assert isinstance(attributes.shared_decoder_count, int) + assert isinstance(attributes.shared_encoder_count, int) + assert isinstance(attributes.shared_jpeg_count, int) + assert isinstance(attributes.shared_ofa_count, int) + assert isinstance(attributes.gpu_instance_slice_count, int) + assert isinstance(attributes.compute_instance_slice_count, int) + assert isinstance(attributes.memory_size_mb, int) + assert attributes.memory_size_mb > 0 + + +def test_c2c_mode_enabled(): + skip_reasons = set() + for device in system.Device.get_all_devices(): + try: + is_enabled = device.is_c2c_mode_enabled + except nvml.NotSupportedError: + skip_reasons.add(f"C2C mode info not supported on {device}") + else: + assert isinstance(is_enabled, bool) + if skip_reasons: + pytest.skip(" ; ".join(skip_reasons)) + + +@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Persistence mode not supported on WSL or Windows") +def test_persistence_mode_enabled(): + for device in system.Device.get_all_devices(): + is_enabled = device.persistence_mode_enabled + assert isinstance(is_enabled, bool) From 826551f9f1dcd4290e3ab0848a1037eba5fee9f5 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Jan 2026 09:58:27 -0500 Subject: [PATCH 2/5] Update cuda_core/cuda/core/system/_device.pyx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cuda_core/cuda/core/system/_device.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 7da611af45..7c5f2cb031 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -260,7 +260,7 @@ cdef class Device: ---------- index: int, optional Integer representing the CUDA device index to get a handle to. Valid - values are between ``0`` and ``cuda.cure.system.get_num_devices() - 1``. + values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``. The order in which devices are enumerated has no guarantees of consistency between reboots. For that reason, it is recommended that From ab57bc5c6cf5bdf1618ec573392e4b7565821f7f Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Jan 2026 09:58:38 -0500 Subject: [PATCH 3/5] Update cuda_core/cuda/core/system/_device.pyx Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cuda_core/cuda/core/system/_device.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 7c5f2cb031..4b70a4fc1a 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -451,7 +451,7 @@ cdef class Device: def persistence_mode_enabled(self, enabled: bool) -> None: nvml.device_set_persistence_mode( self._handle, - enabled and nvml.EnableState.FEATURE_ENABLED or nvml.EnableState.FEATURE_DISABLED + nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED ) From 68557150d29544fca2be0e62f73078acc1f7a7d6 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Jan 2026 09:59:34 -0500 Subject: [PATCH 4/5] Address copilot's comments in PR --- cuda_core/cuda/core/system/_device.pyx | 2 +- cuda_core/tests/system/test_system_device.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 4b70a4fc1a..a870d18741 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -184,7 +184,7 @@ cdef class DeviceAttributes: @property def multiprocessor_count(self) -> int: """ - The streaming multiprocessor count. + The streaming multiprocessor count """ return self._attributes.multiprocessor_count diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index e6d39fc309..034e380f51 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -242,3 +242,8 @@ def test_persistence_mode_enabled(): for device in system.Device.get_all_devices(): is_enabled = device.persistence_mode_enabled assert isinstance(is_enabled, bool) + try: + device.persistence_mode_enabled = False + assert device.persistence_mode_enabled is False + finally: + device.persistence_mode_enabled = is_enabled From 512dbb1187e533e711f294e9c5ab629b73d1569a Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 9 Jan 2026 10:39:06 -0500 Subject: [PATCH 5/5] Skip for devices that don't support attributes --- cuda_core/cuda/core/system/_device.pyx | 3 ++- cuda_core/tests/system/test_system_device.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index a870d18741..6a90e33bf6 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -427,7 +427,8 @@ cdef class Device: """ Get various device attributes. - Only available on Linux systems. + For Ampereā„¢ or newer fully supported devices. Only available on Linux + systems. """ return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 034e380f51..611337960a 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -206,8 +206,14 @@ def test_device_pci_bus_id(): @pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows") def test_device_attributes(): + skip_reasons = [] + for device in system.Device.get_all_devices(): - attributes = device.attributes + try: + attributes = device.attributes + except system.NotSupportedError: + skip_reasons.append(f"Device attributes not supported on '{device.name}'") + continue assert isinstance(attributes, system_device.DeviceAttributes) assert isinstance(attributes.multiprocessor_count, int) @@ -223,6 +229,9 @@ def test_device_attributes(): assert isinstance(attributes.memory_size_mb, int) assert attributes.memory_size_mb > 0 + if skip_reasons: + pytest.skip(" ; ".join(skip_reasons)) + def test_c2c_mode_enabled(): skip_reasons = set()