diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 31c72cdd2b..2371c09c30 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -15,6 +15,7 @@ from ._nvml_context cimport initialize include "_device_utils.pxi" +BrandType = nvml.BrandType FieldId = nvml.FieldId @@ -174,6 +175,77 @@ cdef class PciInfo: return self._pci_info.pci_device_id >> 16 +cdef class DeviceAttributes: + """ + Various device attributes. + """ + def __init__(self, attributes: nvml.DeviceAttributes): + self._attributes = attributes + + @property + def multiprocessor_count(self) -> int: + """ + The streaming multiprocessor count + """ + return self._attributes.multiprocessor_count + + @property + def shared_copy_engine_count(self) -> int: + """ + The shared copy engine count + """ + return self._attributes.shared_copy_engine_count + + @property + def shared_decoder_count(self) -> int: + """ + The shared decoder engine count + """ + return self._attributes.shared_decoder_count + + @property + def shared_encoder_count(self) -> int: + """ + The shared encoder engine count + """ + return self._attributes.shared_encoder_count + + @property + def shared_jpeg_count(self) -> int: + """ + The shared JPEG engine count + """ + return self._attributes.shared_jpeg_count + + @property + def shared_ofa_count(self) -> int: + """ + The shared optical flow accelerator (OFA) engine count + """ + return self._attributes.shared_ofa_count + + @property + def gpu_instance_slice_count(self) -> int: + """ + The GPU instance slice count + """ + return self._attributes.gpu_instance_slice_count + + @property + def compute_instance_slice_count(self) -> int: + """ + The compute instance slice count + """ + return self._attributes.compute_instance_slice_count + + @property + def memory_size_mb(self) -> int: + """ + Device memory size in MiB + """ + return self._attributes.memory_size_mb + + cdef class FieldValue: """ Represents the data from a single field value. @@ -317,39 +389,56 @@ cdef class Device: about devices and their topology, as provided by the NVIDIA Management Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`. + Creating a device instance causes NVML to initialize the target GPU. + NVML may initialize additional GPUs if the target GPU is an SLI slave. + Parameters ---------- index: int, optional - Integer representing the CUDA device index to get a handle to. + Integer representing the CUDA device index to get a handle to. Valid + values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``. + + The order in which devices are enumerated has no guarantees of + consistency between reboots. For that reason, it is recommended that + devices are looked up by their PCI ids or UUID. + uuid: bytes or str, optional UUID of a CUDA device to get a handle to. + pci_bus_id: bytes or str, optional + PCI bus ID of a CUDA device to get a handle to. + Raises ------ ValueError - If neither `index` nor `uuid` are specified or if both are specified. + If anything other than a single `index`, `uuid` or `pci_bus_id` are specified. """ cdef intptr_t _handle - def __init__(self, index: int | None = None, uuid: bytes | str | None = None): + def __init__(self, index: int | None = None, uuid: bytes | str | None = None, pci_bus_id: bytes | str | None = None): initialize() - if index is not None and uuid is not None: - raise ValueError("Handle requires only one of either device `index` or `uuid`.") - if index is None and uuid is None: - raise ValueError("Handle requires either a device `index` or `uuid`.") + args = [index, uuid, pci_bus_id] + arg_count = sum(x is not None for x in args) + + if arg_count > 1: + raise ValueError("Handle requires only one of either device `index`, `uuid` or `pci_bus_id`.") + if arg_count == 0: + raise ValueError("Handle requires either a device `index`, `uuid` or `pci_bus_id`.") if index is not None: self._handle = nvml.device_get_handle_by_index_v2(index) - else: + elif uuid is not None: if isinstance(uuid, bytes): uuid = uuid.decode("ascii") self._handle = nvml.device_get_handle_by_uuid(uuid) - - @property - def handle(self) -> int: - return self._handle + elif pci_bus_id is not None: + if isinstance(pci_bus_id, bytes): + pci_bus_id = pci_bus_id.decode("ascii") + self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id) + else: + raise ValueError("Error parsing arguments") @classmethod def get_all_devices(cls) -> Iterable[Device]: @@ -427,6 +516,24 @@ cdef class Device: """ return nvml.device_get_name(self._handle) + @property + def brand(self) -> BrandType: + """ + Brand of the device + """ + return BrandType(nvml.device_get_brand(self._handle)) + + @property + def index(self) -> int: + """ + The NVML index of this device. + + The order in which NVML enumerates devices has no guarantees of + consistency between reboots. For that reason it is recommended that + devices be looked up by their PCI ids or GPU UUID. + """ + return nvml.device_get_index(self._handle) + @property def pci_info(self) -> PciInfo: """ @@ -451,6 +558,39 @@ cdef class Device: """ return nvml.device_get_uuid(self._handle) + @property + def attributes(self) -> DeviceAttributes: + """ + Get various device attributes. + + For Ampereā„¢ or newer fully supported devices. Only available on Linux + systems. + """ + return DeviceAttributes(nvml.device_get_attributes_v2(self._handle)) + + @property + def is_c2c_mode_enabled(self) -> bool: + """ + Whether the C2C (Chip-to-Chip) mode is enabled for this device. + """ + return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled) + + @property + def persistence_mode_enabled(self) -> bool: + """ + Whether persistence mode is enabled for this device. + + For Linux only. + """ + return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED + + @persistence_mode_enabled.setter + def persistence_mode_enabled(self, enabled: bool) -> None: + nvml.device_set_persistence_mode( + self._handle, + nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED + ) + def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues: """ Get multiple field values from the device. @@ -494,8 +634,10 @@ cdef class Device: __all__ = [ "BAR1MemoryInfo", + "BrandType", "Device", "DeviceArchitecture", + "DeviceAttributes", "FieldId", "FieldValue", "FieldValues", diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index e26add99bc..15338383f6 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -75,6 +75,7 @@ CUDA system information and NVIDIA Management Library (NVML) system.get_driver_version system.get_driver_version_full + system.get_driver_branch system.get_num_devices system.get_nvml_version system.get_process_name @@ -82,15 +83,16 @@ CUDA system information and NVIDIA Management Library (NVML) :template: autosummary/cyclass.rst system.Device + system.BAR1MemoryInfo + system.BrandType system.DeviceArchitecture + system.DeviceAttributes system.FieldId system.FieldValue system.FieldValues system.MemoryInfo - system.BAR1MemoryInfo system.PciInfo - .. module:: cuda.core.utils Utility functions diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 8df7077c5d..52c08533ff 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -13,6 +13,7 @@ import re import sys +import helpers import pytest from cuda.core import system @@ -27,11 +28,6 @@ def check_gpu_available(): pytest.skip("No GPUs available to run device tests", allow_module_level=True) -def test_device_index_handle(): - for device in system.Device.get_all_devices(): - assert isinstance(device.handle, int) - - def test_device_architecture(): for device in system.Device.get_all_devices(): device_arch = device.architecture @@ -191,6 +187,77 @@ def test_unpack_bitmask_single_value(): _device._unpack_bitmask(1) +def test_device_brand(): + for device in system.Device.get_all_devices(): + brand = device.brand + assert isinstance(brand, system.BrandType) + assert isinstance(brand.name, str) + assert isinstance(brand.value, int) + + +def test_device_pci_bus_id(): + for device in system.Device.get_all_devices(): + pci_bus_id = device.pci_info.bus_id + assert isinstance(pci_bus_id, str) + + new_device = system.Device(pci_bus_id=device.pci_info.bus_id) + assert new_device.index == device.index + + +@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows") +def test_device_attributes(): + skip_reasons = [] + + for device in system.Device.get_all_devices(): + try: + attributes = device.attributes + except system.NotSupportedError: + skip_reasons.append(f"Device attributes not supported on '{device.name}'") + continue + assert isinstance(attributes, system.DeviceAttributes) + + assert isinstance(attributes.multiprocessor_count, int) + assert attributes.multiprocessor_count > 0 + + assert isinstance(attributes.shared_copy_engine_count, int) + assert isinstance(attributes.shared_decoder_count, int) + assert isinstance(attributes.shared_encoder_count, int) + assert isinstance(attributes.shared_jpeg_count, int) + assert isinstance(attributes.shared_ofa_count, int) + assert isinstance(attributes.gpu_instance_slice_count, int) + assert isinstance(attributes.compute_instance_slice_count, int) + assert isinstance(attributes.memory_size_mb, int) + assert attributes.memory_size_mb > 0 + + if skip_reasons: + pytest.skip(" ; ".join(skip_reasons)) + + +def test_c2c_mode_enabled(): + skip_reasons = set() + for device in system.Device.get_all_devices(): + try: + is_enabled = device.is_c2c_mode_enabled + except nvml.NotSupportedError: + skip_reasons.add(f"C2C mode info not supported on {device}") + else: + assert isinstance(is_enabled, bool) + if skip_reasons: + pytest.skip(" ; ".join(skip_reasons)) + + +@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Persistence mode not supported on WSL or Windows") +def test_persistence_mode_enabled(): + for device in system.Device.get_all_devices(): + is_enabled = device.persistence_mode_enabled + assert isinstance(is_enabled, bool) + try: + device.persistence_mode_enabled = False + assert device.persistence_mode_enabled is False + finally: + device.persistence_mode_enabled = is_enabled + + def test_field_values(): for device in system.Device.get_all_devices(): # TODO: Are there any fields that return double's? It would be good to