Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 154 additions & 12 deletions cuda_core/cuda/core/system/_device.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ from ._nvml_context cimport initialize
include "_device_utils.pxi"


BrandType = nvml.BrandType
FieldId = nvml.FieldId


Expand Down Expand Up @@ -174,6 +175,77 @@ cdef class PciInfo:
return self._pci_info.pci_device_id >> 16


cdef class DeviceAttributes:
"""
Various device attributes.
"""
def __init__(self, attributes: nvml.DeviceAttributes):
self._attributes = attributes

@property
def multiprocessor_count(self) -> int:
"""
The streaming multiprocessor count
"""
return self._attributes.multiprocessor_count

@property
def shared_copy_engine_count(self) -> int:
"""
The shared copy engine count
"""
return self._attributes.shared_copy_engine_count

@property
def shared_decoder_count(self) -> int:
"""
The shared decoder engine count
"""
return self._attributes.shared_decoder_count

@property
def shared_encoder_count(self) -> int:
"""
The shared encoder engine count
"""
return self._attributes.shared_encoder_count

@property
def shared_jpeg_count(self) -> int:
"""
The shared JPEG engine count
"""
return self._attributes.shared_jpeg_count

@property
def shared_ofa_count(self) -> int:
"""
The shared optical flow accelerator (OFA) engine count
"""
return self._attributes.shared_ofa_count

@property
def gpu_instance_slice_count(self) -> int:
"""
The GPU instance slice count
"""
return self._attributes.gpu_instance_slice_count

@property
def compute_instance_slice_count(self) -> int:
"""
The compute instance slice count
"""
return self._attributes.compute_instance_slice_count

@property
def memory_size_mb(self) -> int:
"""
Device memory size in MiB
Copy link

Copilot AI Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The property name is memory_size_mb which suggests megabytes (MB), but the docstring says "MiB" which is mebibytes. These are different units: 1 MB = 1,000,000 bytes, while 1 MiB = 1,048,576 bytes. The naming should be consistent with the actual unit returned by the underlying NVML API to avoid confusion.

Suggested change
Device memory size in MiB
Device memory size in MB

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a deviation from the naming in NVML itself. What do you think, @leofang?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it difficult to push a change to NVML? To correct the function so we can be consistent.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be a breaking API change to NVML. We could ask them to add a second API and deprecate the old one, I suppose. But I suspect this is a pretty common inconsistency across CUDA APIs (MB vs. MiB) and unlikely to be worth the effort. (But I'm assuming lots there).

"""
return self._attributes.memory_size_mb


cdef class FieldValue:
"""
Represents the data from a single field value.
Expand Down Expand Up @@ -317,39 +389,56 @@ cdef class Device:
about devices and their topology, as provided by the NVIDIA Management
Library (NVML). To use CUDA with a device, use :class:`cuda.core.Device`.

Creating a device instance causes NVML to initialize the target GPU.
NVML may initialize additional GPUs if the target GPU is an SLI slave.

Parameters
----------
index: int, optional
Integer representing the CUDA device index to get a handle to.
Integer representing the CUDA device index to get a handle to. Valid
values are between ``0`` and ``cuda.core.system.get_num_devices() - 1``.

The order in which devices are enumerated has no guarantees of
consistency between reboots. For that reason, it is recommended that
devices are looked up by their PCI ids or UUID.

uuid: bytes or str, optional
UUID of a CUDA device to get a handle to.

pci_bus_id: bytes or str, optional
PCI bus ID of a CUDA device to get a handle to.

Raises
------
ValueError
If neither `index` nor `uuid` are specified or if both are specified.
If anything other than a single `index`, `uuid` or `pci_bus_id` are specified.
"""

cdef intptr_t _handle

def __init__(self, index: int | None = None, uuid: bytes | str | None = None):
def __init__(self, index: int | None = None, uuid: bytes | str | None = None, pci_bus_id: bytes | str | None = None):
initialize()

if index is not None and uuid is not None:
raise ValueError("Handle requires only one of either device `index` or `uuid`.")
if index is None and uuid is None:
raise ValueError("Handle requires either a device `index` or `uuid`.")
args = [index, uuid, pci_bus_id]
arg_count = sum(x is not None for x in args)

if arg_count > 1:
raise ValueError("Handle requires only one of either device `index`, `uuid` or `pci_bus_id`.")
if arg_count == 0:
raise ValueError("Handle requires either a device `index`, `uuid` or `pci_bus_id`.")

if index is not None:
self._handle = nvml.device_get_handle_by_index_v2(index)
else:
elif uuid is not None:
if isinstance(uuid, bytes):
uuid = uuid.decode("ascii")
self._handle = nvml.device_get_handle_by_uuid(uuid)

@property
def handle(self) -> int:
return self._handle
elif pci_bus_id is not None:
if isinstance(pci_bus_id, bytes):
pci_bus_id = pci_bus_id.decode("ascii")
self._handle = nvml.device_get_handle_by_pci_bus_id_v2(pci_bus_id)
else:
raise ValueError("Error parsing arguments")

@classmethod
def get_all_devices(cls) -> Iterable[Device]:
Expand Down Expand Up @@ -427,6 +516,24 @@ cdef class Device:
"""
return nvml.device_get_name(self._handle)

@property
def brand(self) -> BrandType:
"""
Brand of the device
"""
return BrandType(nvml.device_get_brand(self._handle))

@property
def index(self) -> int:
"""
The NVML index of this device.

The order in which NVML enumerates devices has no guarantees of
consistency between reboots. For that reason it is recommended that
devices be looked up by their PCI ids or GPU UUID.
"""
return nvml.device_get_index(self._handle)

@property
def pci_info(self) -> PciInfo:
"""
Expand All @@ -451,6 +558,39 @@ cdef class Device:
"""
return nvml.device_get_uuid(self._handle)

@property
def attributes(self) -> DeviceAttributes:
"""
Get various device attributes.

For Ampere™ or newer fully supported devices. Only available on Linux
systems.
"""
return DeviceAttributes(nvml.device_get_attributes_v2(self._handle))

@property
def is_c2c_mode_enabled(self) -> bool:
"""
Whether the C2C (Chip-to-Chip) mode is enabled for this device.
"""
return bool(nvml.device_get_c2c_mode_info_v(self._handle).is_c2c_enabled)

@property
def persistence_mode_enabled(self) -> bool:
"""
Whether persistence mode is enabled for this device.

For Linux only.
"""
return nvml.device_get_persistence_mode(self._handle) == nvml.EnableState.FEATURE_ENABLED

@persistence_mode_enabled.setter
def persistence_mode_enabled(self, enabled: bool) -> None:
nvml.device_set_persistence_mode(
self._handle,
nvml.EnableState.FEATURE_ENABLED if enabled else nvml.EnableState.FEATURE_DISABLED
)

def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues:
"""
Get multiple field values from the device.
Expand Down Expand Up @@ -494,8 +634,10 @@ cdef class Device:

__all__ = [
"BAR1MemoryInfo",
"BrandType",
"Device",
"DeviceArchitecture",
"DeviceAttributes",
"FieldId",
"FieldValue",
"FieldValues",
Expand Down
6 changes: 4 additions & 2 deletions cuda_core/docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -75,22 +75,24 @@ CUDA system information and NVIDIA Management Library (NVML)

system.get_driver_version
system.get_driver_version_full
system.get_driver_branch
system.get_num_devices
system.get_nvml_version
system.get_process_name

:template: autosummary/cyclass.rst

system.Device
system.BAR1MemoryInfo
system.BrandType
system.DeviceArchitecture
system.DeviceAttributes
system.FieldId
system.FieldValue
system.FieldValues
system.MemoryInfo
system.BAR1MemoryInfo
system.PciInfo


.. module:: cuda.core.utils

Utility functions
Expand Down
77 changes: 72 additions & 5 deletions cuda_core/tests/system/test_system_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import re
import sys

import helpers
import pytest
from cuda.core import system

Expand All @@ -27,11 +28,6 @@ def check_gpu_available():
pytest.skip("No GPUs available to run device tests", allow_module_level=True)


def test_device_index_handle():
for device in system.Device.get_all_devices():
assert isinstance(device.handle, int)


def test_device_architecture():
for device in system.Device.get_all_devices():
device_arch = device.architecture
Expand Down Expand Up @@ -191,6 +187,77 @@ def test_unpack_bitmask_single_value():
_device._unpack_bitmask(1)


def test_device_brand():
for device in system.Device.get_all_devices():
brand = device.brand
assert isinstance(brand, system.BrandType)
assert isinstance(brand.name, str)
assert isinstance(brand.value, int)


def test_device_pci_bus_id():
for device in system.Device.get_all_devices():
pci_bus_id = device.pci_info.bus_id
assert isinstance(pci_bus_id, str)

new_device = system.Device(pci_bus_id=device.pci_info.bus_id)
assert new_device.index == device.index


@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Device attributes not supported on WSL or Windows")
def test_device_attributes():
skip_reasons = []

for device in system.Device.get_all_devices():
try:
attributes = device.attributes
except system.NotSupportedError:
skip_reasons.append(f"Device attributes not supported on '{device.name}'")
continue
assert isinstance(attributes, system.DeviceAttributes)

assert isinstance(attributes.multiprocessor_count, int)
assert attributes.multiprocessor_count > 0

assert isinstance(attributes.shared_copy_engine_count, int)
assert isinstance(attributes.shared_decoder_count, int)
assert isinstance(attributes.shared_encoder_count, int)
assert isinstance(attributes.shared_jpeg_count, int)
assert isinstance(attributes.shared_ofa_count, int)
assert isinstance(attributes.gpu_instance_slice_count, int)
assert isinstance(attributes.compute_instance_slice_count, int)
assert isinstance(attributes.memory_size_mb, int)
assert attributes.memory_size_mb > 0

if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))


def test_c2c_mode_enabled():
skip_reasons = set()
for device in system.Device.get_all_devices():
try:
is_enabled = device.is_c2c_mode_enabled
except nvml.NotSupportedError:
skip_reasons.add(f"C2C mode info not supported on {device}")
else:
assert isinstance(is_enabled, bool)
if skip_reasons:
pytest.skip(" ; ".join(skip_reasons))


@pytest.mark.skipif(helpers.IS_WSL or helpers.IS_WINDOWS, reason="Persistence mode not supported on WSL or Windows")
def test_persistence_mode_enabled():
for device in system.Device.get_all_devices():
is_enabled = device.persistence_mode_enabled
assert isinstance(is_enabled, bool)
try:
device.persistence_mode_enabled = False
assert device.persistence_mode_enabled is False
finally:
device.persistence_mode_enabled = is_enabled


def test_field_values():
for device in system.Device.get_all_devices():
# TODO: Are there any fields that return double's? It would be good to
Expand Down
Loading