diff --git a/cuda_bindings/cuda/bindings/_nvml.pyx b/cuda_bindings/cuda/bindings/_nvml.pyx index d9bddcc4bc..3a3f01ea7a 100644 --- a/cuda_bindings/cuda/bindings/_nvml.pyx +++ b/cuda_bindings/cuda/bindings/_nvml.pyx @@ -787,7 +787,7 @@ class AffinityScope(_IntEnum): SOCKET = 1 # Scope of processor socket for affinity queries -class FI(_IntEnum): +class FieldId(_IntEnum): DEV_ECC_CURRENT = 1 # Current ECC mode. 1=Active. 0=Inactive DEV_ECC_PENDING = 2 # Pending ECC mode. 1=Active. 0=Inactive # ECC Count Totals @@ -1778,7 +1778,7 @@ cdef _get_pci_info_ext_v1_dtype_offsets(): cdef nvmlPciInfoExt_v1_t pod = nvmlPciInfoExt_v1_t() return _numpy.dtype({ 'names': ['version', 'domain', 'bus', 'device_', 'pci_device_id', 'pci_sub_system_id', 'base_class', 'sub_class', 'bus_id'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.int8, 32)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.domain)) - (&pod), @@ -1998,7 +1998,7 @@ cdef _get_pci_info_dtype_offsets(): cdef nvmlPciInfo_t pod = nvmlPciInfo_t() return _numpy.dtype({ 'names': ['bus_id_legacy', 'domain', 'bus', 'device_', 'pci_device_id', 'pci_sub_system_id', 'bus_id'], - 'formats': [_numpy.int8, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.int8], + 'formats': [(_numpy.int8, 16), _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.int8, 32)], 'offsets': [ (&(pod.busIdLegacy)) - (&pod), (&(pod.domain)) - (&pod), @@ -4901,7 +4901,7 @@ cdef _get_device_perf_modes_v1_dtype_offsets(): cdef nvmlDevicePerfModes_v1_t pod = nvmlDevicePerfModes_v1_t() return _numpy.dtype({ 'names': ['version', 'str'], - 'formats': [_numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, (_numpy.int8, 2048)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.str)) - (&pod), @@ -5037,7 +5037,7 @@ cdef _get_device_current_clock_freqs_v1_dtype_offsets(): cdef nvmlDeviceCurrentClockFreqs_v1_t pod = nvmlDeviceCurrentClockFreqs_v1_t() return _numpy.dtype({ 'names': ['version', 'str'], - 'formats': [_numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, (_numpy.int8, 2048)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.str)) - (&pod), @@ -5849,7 +5849,7 @@ cdef _get_platform_info_v1_dtype_offsets(): cdef nvmlPlatformInfo_v1_t pod = nvmlPlatformInfo_v1_t() return _numpy.dtype({ 'names': ['version', 'ib_guid', 'rack_guid', 'chassis_physical_slot_number', 'compute_slot_ind_ex', 'node_ind_ex', 'peer_type', 'module_id'], - 'formats': [_numpy.uint32, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8], + 'formats': [_numpy.uint32, (_numpy.uint8, 16), (_numpy.uint8, 16), _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.ibGuid)) - (&pod), @@ -6061,7 +6061,7 @@ cdef _get_platform_info_v2_dtype_offsets(): cdef nvmlPlatformInfo_v2_t pod = nvmlPlatformInfo_v2_t() return _numpy.dtype({ 'names': ['version', 'ib_guid', 'chassis_serial_number', 'slot_number', 'tray_ind_ex', 'host_id', 'peer_type', 'module_id'], - 'formats': [_numpy.uint32, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8], + 'formats': [_numpy.uint32, (_numpy.uint8, 16), (_numpy.uint8, 16), _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8, _numpy.uint8], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.ibGuid)) - (&pod), @@ -6993,7 +6993,7 @@ cdef _get_vgpu_process_utilization_info_v1_dtype_offsets(): cdef nvmlVgpuProcessUtilizationInfo_v1_t pod = nvmlVgpuProcessUtilizationInfo_v1_t() return _numpy.dtype({ 'names': ['process_name', 'time_stamp', 'vgpu_instance', 'pid', 'sm_util', 'mem_util', 'enc_util', 'dec_util', 'jpg_util', 'ofa_util'], - 'formats': [_numpy.int8, _numpy.uint64, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32], + 'formats': [(_numpy.int8, 64), _numpy.uint64, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32], 'offsets': [ (&(pod.processName)) - (&pod), (&(pod.timeStamp)) - (&pod), @@ -8063,7 +8063,7 @@ cdef _get_vgpu_scheduler_capabilities_dtype_offsets(): cdef nvmlVgpuSchedulerCapabilities_t pod = nvmlVgpuSchedulerCapabilities_t() return _numpy.dtype({ 'names': ['supported_schedulers', 'max_timeslice', 'min_timeslice', 'is_arr_mode_supported', 'max_frequency_for_arr', 'min_frequency_for_arr', 'max_avg_factor_for_arr', 'min_avg_factor_for_arr'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32], + 'formats': [(_numpy.uint32, 3), _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32], 'offsets': [ (&(pod.supportedSchedulers)) - (&pod), (&(pod.maxTimeslice)) - (&pod), @@ -9233,7 +9233,7 @@ cdef _get_hwbc_entry_dtype_offsets(): cdef nvmlHwbcEntry_t pod = nvmlHwbcEntry_t() return _numpy.dtype({ 'names': ['hwbc_id', 'firmware_version'], - 'formats': [_numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, (_numpy.int8, 32)], 'offsets': [ (&(pod.hwbcId)) - (&pod), (&(pod.firmwareVersion)) - (&pod), @@ -9377,7 +9377,7 @@ cdef _get_led_state_dtype_offsets(): cdef nvmlLedState_t pod = nvmlLedState_t() return _numpy.dtype({ 'names': ['cause', 'color'], - 'formats': [_numpy.int8, _numpy.int32], + 'formats': [(_numpy.int8, 256), _numpy.int32], 'offsets': [ (&(pod.cause)) - (&pod), (&(pod.color)) - (&pod), @@ -9513,7 +9513,7 @@ cdef _get_unit_info_dtype_offsets(): cdef nvmlUnitInfo_t pod = nvmlUnitInfo_t() return _numpy.dtype({ 'names': ['name', 'id', 'serial', 'firmware_version'], - 'formats': [_numpy.int8, _numpy.int8, _numpy.int8, _numpy.int8], + 'formats': [(_numpy.int8, 96), (_numpy.int8, 96), (_numpy.int8, 96), (_numpy.int8, 96)], 'offsets': [ (&(pod.name)) - (&pod), (&(pod.id)) - (&pod), @@ -9685,7 +9685,7 @@ cdef _get_psu_info_dtype_offsets(): cdef nvmlPSUInfo_t pod = nvmlPSUInfo_t() return _numpy.dtype({ 'names': ['state', 'current', 'voltage', 'power'], - 'formats': [_numpy.int8, _numpy.uint32, _numpy.uint32, _numpy.uint32], + 'formats': [(_numpy.int8, 256), _numpy.uint32, _numpy.uint32, _numpy.uint32], 'offsets': [ (&(pod.state)) - (&pod), (&(pod.current)) - (&pod), @@ -11544,7 +11544,7 @@ cdef _get_conf_compute_gpu_certificate_dtype_offsets(): cdef nvmlConfComputeGpuCertificate_t pod = nvmlConfComputeGpuCertificate_t() return _numpy.dtype({ 'names': ['cert_chain_size', 'attestation_cert_chain_size', 'cert_chain', 'attestation_cert_chain'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint8, _numpy.uint8], + 'formats': [_numpy.uint32, _numpy.uint32, (_numpy.uint8, 4096), (_numpy.uint8, 5120)], 'offsets': [ (&(pod.certChainSize)) - (&pod), (&(pod.attestationCertChainSize)) - (&pod), @@ -11708,7 +11708,7 @@ cdef _get_conf_compute_gpu_attestation_report_dtype_offsets(): cdef nvmlConfComputeGpuAttestationReport_t pod = nvmlConfComputeGpuAttestationReport_t() return _numpy.dtype({ 'names': ['is_cec_attestation_report_present', 'attestation_report_size', 'cec_attestation_report_size', 'nonce', 'attestation_report', 'cec_attestation_report'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint8, _numpy.uint8, _numpy.uint8], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.uint8, 32), (_numpy.uint8, 8192), (_numpy.uint8, 4096)], 'offsets': [ (&(pod.isCecAttestationReportPresent)) - (&pod), (&(pod.attestationReportSize)) - (&pod), @@ -12032,7 +12032,7 @@ cdef _get_gpu_fabric_info_v2_dtype_offsets(): cdef nvmlGpuFabricInfo_v2_t pod = nvmlGpuFabricInfo_v2_t() return _numpy.dtype({ 'names': ['version', 'cluster_uuid', 'status', 'clique_id', 'state', 'health_mask'], - 'formats': [_numpy.uint32, _numpy.uint8, _numpy.int32, _numpy.uint32, _numpy.uint8, _numpy.uint32], + 'formats': [_numpy.uint32, (_numpy.uint8, 16), _numpy.int32, _numpy.uint32, _numpy.uint8, _numpy.uint32], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.clusterUuid)) - (&pod), @@ -12216,7 +12216,7 @@ cdef _get_nvlink_supported_bw_modes_v1_dtype_offsets(): cdef nvmlNvlinkSupportedBwModes_v1_t pod = nvmlNvlinkSupportedBwModes_v1_t() return _numpy.dtype({ 'names': ['version', 'bw_modes', 'total_bw_modes'], - 'formats': [_numpy.uint32, _numpy.uint8, _numpy.uint8], + 'formats': [_numpy.uint32, (_numpy.uint8, 23), _numpy.uint8], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.bwModes)) - (&pod), @@ -12784,7 +12784,7 @@ cdef _get_vgpu_metadata_dtype_offsets(): cdef nvmlVgpuMetadata_t pod = nvmlVgpuMetadata_t() return _numpy.dtype({ 'names': ['version', 'revision', 'guest_info_state', 'guest_driver_version', 'host_driver_version', 'reserved', 'vgpu_virtualization_caps', 'guest_vgpu_version', 'opaque_data_size', 'opaque_data'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.int32, _numpy.int8, _numpy.int8, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.int32, (_numpy.int8, 80), (_numpy.int8, 80), (_numpy.uint32, 6), _numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.int8, 4)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.revision)) - (&pod), @@ -13291,7 +13291,7 @@ cdef _get_gpu_instance_profile_info_v2_dtype_offsets(): cdef nvmlGpuInstanceProfileInfo_v2_t pod = nvmlGpuInstanceProfileInfo_v2_t() return _numpy.dtype({ 'names': ['version', 'id', 'is_p2p_supported', 'slice_count', 'instance_count', 'multiprocessor_count', 'copy_engine_count', 'decoder_count', 'encoder_count', 'jpeg_count', 'ofa_count', 'memory_size_mb', 'name'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint64, _numpy.int8], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint64, (_numpy.int8, 96)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.id)) - (&pod), @@ -13559,7 +13559,7 @@ cdef _get_gpu_instance_profile_info_v3_dtype_offsets(): cdef nvmlGpuInstanceProfileInfo_v3_t pod = nvmlGpuInstanceProfileInfo_v3_t() return _numpy.dtype({ 'names': ['version', 'id', 'slice_count', 'instance_count', 'multiprocessor_count', 'copy_engine_count', 'decoder_count', 'encoder_count', 'jpeg_count', 'ofa_count', 'memory_size_mb', 'name', 'capabilities'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint64, _numpy.int8, _numpy.uint32], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint64, (_numpy.int8, 96), _numpy.uint32], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.id)) - (&pod), @@ -13973,7 +13973,7 @@ cdef _get_compute_instance_profile_info_v2_dtype_offsets(): cdef nvmlComputeInstanceProfileInfo_v2_t pod = nvmlComputeInstanceProfileInfo_v2_t() return _numpy.dtype({ 'names': ['version', 'id', 'slice_count', 'instance_count', 'multiprocessor_count', 'shared_copy_engine_count', 'shared_decoder_count', 'shared_encoder_count', 'shared_jpeg_count', 'shared_ofa_count', 'name'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.int8, 96)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.id)) - (&pod), @@ -14217,7 +14217,7 @@ cdef _get_compute_instance_profile_info_v3_dtype_offsets(): cdef nvmlComputeInstanceProfileInfo_v3_t pod = nvmlComputeInstanceProfileInfo_v3_t() return _numpy.dtype({ 'names': ['version', 'id', 'slice_count', 'instance_count', 'multiprocessor_count', 'shared_copy_engine_count', 'shared_decoder_count', 'shared_encoder_count', 'shared_jpeg_count', 'shared_ofa_count', 'name', 'capabilities'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.int8, _numpy.uint32], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, (_numpy.int8, 96), _numpy.uint32], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.id)) - (&pod), @@ -15495,7 +15495,7 @@ cdef _get_gpu_fabric_info_v3_dtype_offsets(): cdef nvmlGpuFabricInfo_v3_t pod = nvmlGpuFabricInfo_v3_t() return _numpy.dtype({ 'names': ['version', 'cluster_uuid', 'status', 'clique_id', 'state', 'health_mask', 'health_summary'], - 'formats': [_numpy.uint32, _numpy.uint8, _numpy.int32, _numpy.uint32, _numpy.uint8, _numpy.uint32, _numpy.uint8], + 'formats': [_numpy.uint32, (_numpy.uint8, 16), _numpy.int32, _numpy.uint32, _numpy.uint8, _numpy.uint32, _numpy.uint8], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.clusterUuid)) - (&pod), @@ -16351,7 +16351,7 @@ cdef _get_excluded_device_info_dtype_offsets(): cdef nvmlExcludedDeviceInfo_t pod = nvmlExcludedDeviceInfo_t() return _numpy.dtype({ 'names': ['pci_info', 'uuid'], - 'formats': [pci_info_dtype, _numpy.int8], + 'formats': [pci_info_dtype, (_numpy.int8, 80)], 'offsets': [ (&(pod.pciInfo)) - (&pod), (&(pod.uuid)) - (&pod), @@ -16641,7 +16641,7 @@ cdef _get_bridge_chip_hierarchy_dtype_offsets(): cdef nvmlBridgeChipHierarchy_t pod = nvmlBridgeChipHierarchy_t() return _numpy.dtype({ 'names': ['bridge_count', 'bridge_chip_info'], - 'formats': [_numpy.uint8, bridge_chip_info_dtype], + 'formats': [_numpy.uint8, (bridge_chip_info_dtype, 128)], 'offsets': [ (&(pod.bridgeCount)) - (&pod), (&(pod.bridgeChipInfo)) - (&pod), @@ -17475,7 +17475,7 @@ cdef _get_gpu_thermal_settings_dtype_offsets(): cdef nvmlGpuThermalSettings_t pod = nvmlGpuThermalSettings_t() return _numpy.dtype({ 'names': ['count', 'sensor'], - 'formats': [_numpy.uint32, _py_anon_pod0_dtype], + 'formats': [_numpy.uint32, (_py_anon_pod0_dtype, 3)], 'offsets': [ (&(pod.count)) - (&pod), (&(pod.sensor)) - (&pod), @@ -17610,7 +17610,7 @@ cdef _get_clk_mon_status_dtype_offsets(): cdef nvmlClkMonStatus_t pod = nvmlClkMonStatus_t() return _numpy.dtype({ 'names': ['b_global_status', 'clk_mon_list_size', 'clk_mon_list'], - 'formats': [_numpy.uint32, _numpy.uint32, clk_mon_fault_info_dtype], + 'formats': [_numpy.uint32, _numpy.uint32, (clk_mon_fault_info_dtype, 32)], 'offsets': [ (&(pod.bGlobalStatus)) - (&pod), (&(pod.clkMonListSize)) - (&pod), @@ -17910,7 +17910,7 @@ cdef _get_gpu_dynamic_pstates_info_dtype_offsets(): cdef nvmlGpuDynamicPstatesInfo_t pod = nvmlGpuDynamicPstatesInfo_t() return _numpy.dtype({ 'names': ['flags_', 'utilization'], - 'formats': [_numpy.uint32, _py_anon_pod1_dtype], + 'formats': [_numpy.uint32, (_py_anon_pod1_dtype, 8)], 'offsets': [ (&(pod.flags)) - (&pod), (&(pod.utilization)) - (&pod), @@ -18601,7 +18601,7 @@ cdef _get_grid_licensable_feature_dtype_offsets(): cdef nvmlGridLicensableFeature_t pod = nvmlGridLicensableFeature_t() return _numpy.dtype({ 'names': ['feature_code', 'feature_state', 'license_info', 'product_name', 'feature_enabled', 'license_expiry'], - 'formats': [_numpy.int32, _numpy.uint32, _numpy.int8, _numpy.int8, _numpy.uint32, grid_license_expiry_dtype], + 'formats': [_numpy.int32, _numpy.uint32, (_numpy.int8, 128), (_numpy.int8, 128), _numpy.uint32, grid_license_expiry_dtype], 'offsets': [ (&(pod.featureCode)) - (&pod), (&(pod.featureState)) - (&pod), @@ -18789,7 +18789,7 @@ cdef _get_unit_fan_speeds_dtype_offsets(): cdef nvmlUnitFanSpeeds_t pod = nvmlUnitFanSpeeds_t() return _numpy.dtype({ 'names': ['fans', 'count'], - 'formats': [unit_fan_info_dtype, _numpy.uint32], + 'formats': [(unit_fan_info_dtype, 24), _numpy.uint32], 'offsets': [ (&(pod.fans)) - (&pod), (&(pod.count)) - (&pod), @@ -18924,7 +18924,7 @@ cdef _get_vgpu_pgpu_metadata_dtype_offsets(): cdef nvmlVgpuPgpuMetadata_t pod = nvmlVgpuPgpuMetadata_t() return _numpy.dtype({ 'names': ['version', 'revision', 'host_driver_version', 'pgpu_virtualization_caps', 'reserved', 'host_supported_vgpu_range', 'opaque_data_size', 'opaque_data'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.int8, _numpy.uint32, _numpy.uint32, vgpu_version_dtype, _numpy.uint32, _numpy.int8], + 'formats': [_numpy.uint32, _numpy.uint32, (_numpy.int8, 80), _numpy.uint32, (_numpy.uint32, 5), vgpu_version_dtype, _numpy.uint32, (_numpy.int8, 4)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.revision)) - (&pod), @@ -19593,7 +19593,7 @@ cdef _get_nvlink_firmware_info_dtype_offsets(): cdef nvmlNvlinkFirmwareInfo_t pod = nvmlNvlinkFirmwareInfo_t() return _numpy.dtype({ 'names': ['firmware_version', 'num_valid_entries'], - 'formats': [nvlink_firmware_version_dtype, _numpy.uint32], + 'formats': [(nvlink_firmware_version_dtype, 100), _numpy.uint32], 'offsets': [ (&(pod.firmwareVersion)) - (&pod), (&(pod.numValidEntries)) - (&pod), @@ -20039,7 +20039,7 @@ cdef _get_vgpu_scheduler_log_dtype_offsets(): cdef nvmlVgpuSchedulerLog_t pod = nvmlVgpuSchedulerLog_t() return _numpy.dtype({ 'names': ['engine_id', 'scheduler_policy', 'arr_mode', 'scheduler_params', 'entries_count', 'log_entries'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, vgpu_scheduler_params_dtype, _numpy.uint32, vgpu_scheduler_log_entry_dtype], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, vgpu_scheduler_params_dtype, _numpy.uint32, (vgpu_scheduler_log_entry_dtype, 200)], 'offsets': [ (&(pod.engineId)) - (&pod), (&(pod.schedulerPolicy)) - (&pod), @@ -20537,7 +20537,7 @@ cdef _get_vgpu_scheduler_log_info_v1_dtype_offsets(): cdef nvmlVgpuSchedulerLogInfo_v1_t pod = nvmlVgpuSchedulerLogInfo_v1_t() return _numpy.dtype({ 'names': ['version', 'engine_id', 'scheduler_policy', 'arr_mode', 'scheduler_params', 'entries_count', 'log_entries'], - 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, vgpu_scheduler_params_dtype, _numpy.uint32, vgpu_scheduler_log_entry_dtype], + 'formats': [_numpy.uint32, _numpy.uint32, _numpy.uint32, _numpy.uint32, vgpu_scheduler_params_dtype, _numpy.uint32, (vgpu_scheduler_log_entry_dtype, 200)], 'offsets': [ (&(pod.version)) - (&pod), (&(pod.engineId)) - (&pod), @@ -20902,7 +20902,7 @@ cdef _get_grid_licensable_features_dtype_offsets(): cdef nvmlGridLicensableFeatures_t pod = nvmlGridLicensableFeatures_t() return _numpy.dtype({ 'names': ['is_grid_license_supported', 'licensable_features_count', 'grid_licensable_features'], - 'formats': [_numpy.int32, _numpy.uint32, grid_licensable_feature_dtype], + 'formats': [_numpy.int32, _numpy.uint32, (grid_licensable_feature_dtype, 3)], 'offsets': [ (&(pod.isGridLicenseSupported)) - (&pod), (&(pod.licensableFeaturesCount)) - (&pod), @@ -27299,10 +27299,16 @@ cdef FieldValue _cast_field_values(values): values_ = FieldValue(valuesCount) for i, v in enumerate(values): if isinstance(v, tuple): + if len(v) != 2: + raise ValueError("FieldValue tuple must be of length 2") + if not isinstance(v[0], int) or not isinstance(v[1], int): + raise ValueError("FieldValue tuple elements must be integers") values_[i].field_id = v[0] values_[i].scope_id = v[1] - else: + elif isinstance(v, int): values_[i].field_id = v + else: + raise ValueError("Each entry must be an integer field ID, or a tuple of (field ID, scope ID)") return values_ @@ -27322,7 +27328,8 @@ cpdef object device_get_field_values(intptr_t device, values): __status__ = nvmlDeviceGetFieldValues(device, valuesCount, ptr) check_status(__status__) - return FieldValue.from_ptr(ptr, valuesCount) + values_._data.resize((valuesCount,)) + return values_ cpdef object device_clear_field_values(intptr_t device, values): diff --git a/cuda_bindings/tests/nvml/test_nvlink.py b/cuda_bindings/tests/nvml/test_nvlink.py index 14799898be..99407abc19 100644 --- a/cuda_bindings/tests/nvml/test_nvlink.py +++ b/cuda_bindings/tests/nvml/test_nvlink.py @@ -11,14 +11,14 @@ def test_nvlink_get_link_count(all_devices): """ for device in all_devices: fields = nvml.FieldValue(1) - fields[0].field_id = nvml.FI.DEV_NVLINK_LINK_COUNT + fields[0].field_id = nvml.FieldId.DEV_NVLINK_LINK_COUNT value = nvml.device_get_field_values(device, fields)[0] assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, ( f"Unexpected return {value.nvml_return} for link count field query" ) # Use the alternative argument to device_get_field_values - value = nvml.device_get_field_values(device, [nvml.FI.DEV_NVLINK_LINK_COUNT])[0] + value = nvml.device_get_field_values(device, [nvml.FieldId.DEV_NVLINK_LINK_COUNT])[0] assert value.nvml_return == nvml.Return.SUCCESS or value.nvml_return == nvml.Return.ERROR_NOT_SUPPORTED, ( f"Unexpected return {value.nvml_return} for link count field query" ) diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index 2b5ec242e8..31c72cdd2b 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -15,6 +15,9 @@ from ._nvml_context cimport initialize include "_device_utils.pxi" +FieldId = nvml.FieldId + + class DeviceArchitecture: """ Device architecture enumeration. @@ -171,6 +174,141 @@ cdef class PciInfo: return self._pci_info.pci_device_id >> 16 +cdef class FieldValue: + """ + Represents the data from a single field value. + + Use :meth:`Device.get_field_values` to get multiple field values at once. + """ + cdef object _field_value + + def __init__(self, field_value: nvml.FieldValue): + assert len(field_value) == 1 + self._field_value = field_value + + @property + def field_id(self) -> FieldId: + """ + The field ID. + """ + return FieldId(self._field_value.field_id) + + @property + def scope_id(self) -> int: + """ + The scope ID. + """ + # Explicit int() cast required because this is a Numpy type + return int(self._field_value.scope_id) + + @property + def timestamp(self) -> int: + """ + The CPU timestamp (in microseconds since 1970) at which the value was + sampled. + """ + # Explicit int() cast required because this is a Numpy type + return int(self._field_value.timestamp) + + @property + def latency_usec(self) -> int: + """ + How long this field value took to update (in usec) within NVML. This may + be averaged across several fields that are serviced by the same driver + call. + """ + # Explicit int() cast required because this is a Numpy type + return int(self._field_value.latency_usec) + + @property + def value(self) -> int | float: + """ + The field value. + + Raises + ------ + :class:`cuda.core.system.NvmlError` + If there was an error retrieving the field value. + """ + nvml.check_status(self._field_value.nvml_return) + + cdef int value_type = self._field_value.value_type + value = self._field_value.value + + ValueType = nvml.ValueType + + if value_type == ValueType.DOUBLE: + return float(value.d_val[0]) + elif value_type == ValueType.UNSIGNED_INT: + return int(value.ui_val[0]) + elif value_type == ValueType.UNSIGNED_LONG: + return int(value.ul_val[0]) + elif value_type == ValueType.UNSIGNED_LONG_LONG: + return int(value.ull_val[0]) + elif value_type == ValueType.SIGNED_LONG_LONG: + return int(value.ll_val[0]) + elif value_type == ValueType.SIGNED_INT: + return int(value.si_val[0]) + elif value_type == ValueType.UNSIGNED_SHORT: + return int(value.us_val[0]) + else: + raise AssertionError("Unexpected value type") + + +cdef class FieldValues: + """ + Container of multiple field values. + """ + cdef object _field_values + + def __init__(self, field_values: nvml.FieldValue): + self._field_values = field_values + + def __getitem__(self, idx: int) -> FieldValue: + return FieldValue(self._field_values[idx]) + + def __len__(self) -> int: + return len(self._field_values) + + def validate(self) -> None: + """ + Validate that there are no issues in any of the contained field values. + + Raises an exception for the first issue found, if any. + + Raises + ------ + :class:`cuda.core.system.NvmlError` + If any of the contained field values has an associated exception. + """ + # TODO: This is a classic use case for an `ExceptionGroup`, but those + # are only available in Python 3.11+. + return_values = self._field_values.nvml_return + if len(self._field_values) == 1: + return_values = [return_values] + for return_value in return_values: + nvml.check_status(return_value) + + def get_all_values(self) -> list[int | float]: + """ + Get all field values as a list. + + This will validate each of the values and include just the core value in + the list. + + Returns + ------- + list[int | float] + List of all field values. + + Raises + ------ + :class:`cuda.core.system.NvmlError` + If any of the contained field values has an associated exception. + """ + return [x.value for x in self] + + cdef class Device: """ Representation of a device. @@ -313,11 +451,54 @@ cdef class Device: """ return nvml.device_get_uuid(self._handle) + def get_field_values(self, field_ids: list[int | tuple[int, int]]) -> FieldValues: + """ + Get multiple field values from the device. + + Each value specified can raise its own exception. That exception will + be raised when attempting to access the corresponding ``value`` from the + returned :class:`FieldValues` container. + + To confirm that there are no exceptions in the entire container, call + :meth:`FieldValues.validate`. + + Parameters + ---------- + field_ids: list of int or tuple of (int, int) + List of field IDs to query. + + Each item may be either a single value from the :class:`FieldId` + enum, or a pair of (:class:`FieldId`, scope ID). + + Returns + ------- + :class:`FieldValues` + Container of field values corresponding to the requested field IDs. + """ + return FieldValues(nvml.device_get_field_values(self._handle, field_ids)) + + def clear_field_values(self, field_ids: list[int | tuple[int, int]]) -> None: + """ + Clear multiple field values from the device. + + Parameters + ---------- + field_ids: list of int or tuple of (int, int) + List of field IDs to clear. + + Each item may be either a single value from the :class:`FieldId` + enum, or a pair of (:class:`FieldId`, scope ID). + """ + nvml.device_clear_field_values(self._handle, field_ids) + __all__ = [ "BAR1MemoryInfo", "Device", "DeviceArchitecture", + "FieldId", + "FieldValue", + "FieldValues", "MemoryInfo", "PciInfo", ] diff --git a/cuda_core/cuda/core/system/_system.pyx b/cuda_core/cuda/core/system/_system.pyx index adef2d8afc..c29d20dd20 100644 --- a/cuda_core/cuda/core/system/_system.pyx +++ b/cuda_core/cuda/core/system/_system.pyx @@ -20,6 +20,14 @@ else: if CUDA_BINDINGS_NVML_IS_COMPATIBLE: from cuda.bindings import _nvml as nvml + # TODO: We need to be even more specific than version numbers for development. + # This can be removed once we have a release including everything we need. + for member in ["FieldId"]: + if not hasattr(nvml, member): + CUDA_BINDINGS_NVML_IS_COMPATIBLE = False + break + +if CUDA_BINDINGS_NVML_IS_COMPATIBLE: from ._nvml_context import initialize else: from cuda.core._utils.cuda_utils import driver, handle_return, runtime diff --git a/cuda_core/docs/source/api.rst b/cuda_core/docs/source/api.rst index 13e1f43a2f..c6877e35ec 100644 --- a/cuda_core/docs/source/api.rst +++ b/cuda_core/docs/source/api.rst @@ -80,6 +80,9 @@ CUDA system information and NVIDIA Management Library (NVML) system.Device system.DeviceArchitecture + system.FieldId + system.FieldValue + system.FieldValues system.MemoryInfo system.BAR1MemoryInfo system.PciInfo diff --git a/cuda_core/tests/system/test_system_device.py b/cuda_core/tests/system/test_system_device.py index 134ea7cbbe..8df7077c5d 100644 --- a/cuda_core/tests/system/test_system_device.py +++ b/cuda_core/tests/system/test_system_device.py @@ -15,10 +15,10 @@ import pytest from cuda.core import system -from cuda.core.system import _device as system_device if system.CUDA_BINDINGS_NVML_IS_COMPATIBLE: from cuda.bindings import _nvml as nvml + from cuda.core.system import _device @pytest.fixture(autouse=True, scope="module") @@ -36,7 +36,7 @@ def test_device_architecture(): for device in system.Device.get_all_devices(): device_arch = device.architecture - assert isinstance(device_arch, system_device.DeviceArchitecture) + assert isinstance(device_arch, system.DeviceArchitecture) if sys.version_info < (3, 12): assert device_arch.id in nvml.DeviceArch.__members__.values() else: @@ -52,7 +52,7 @@ def test_device_bar1_memory(): bar1_memory_info.used, ) - assert isinstance(bar1_memory_info, system_device.BAR1MemoryInfo) + assert isinstance(bar1_memory_info, system.BAR1MemoryInfo) assert isinstance(free, int) assert isinstance(total, int) assert isinstance(used, int) @@ -93,7 +93,7 @@ def test_device_memory(): memory_info = device.memory_info free, total, used, reserved = memory_info.free, memory_info.total, memory_info.used, memory_info.reserved - assert isinstance(memory_info, system_device.MemoryInfo) + assert isinstance(memory_info, system.MemoryInfo) assert isinstance(free, int) assert isinstance(total, int) assert isinstance(used, int) @@ -116,7 +116,7 @@ def test_device_name(): def test_device_pci_info(): for device in system.Device.get_all_devices(): pci_info = device.pci_info - assert isinstance(pci_info, system_device.PciInfo) + assert isinstance(pci_info, system.PciInfo) assert isinstance(pci_info.bus_id, str) assert re.match("[a-f0-9]{8}:[a-f0-9]{2}:[a-f0-9]{2}.[a-f0-9]", pci_info.bus_id.lower()) @@ -183,9 +183,57 @@ def test_device_uuid(): ], ) def test_unpack_bitmask(params): - assert system_device._unpack_bitmask(array.array("Q", params["input"])) == params["output"] + assert _device._unpack_bitmask(array.array("Q", params["input"])) == params["output"] def test_unpack_bitmask_single_value(): with pytest.raises(TypeError): - system_device._unpack_bitmask(1) + _device._unpack_bitmask(1) + + +def test_field_values(): + for device in system.Device.get_all_devices(): + # TODO: Are there any fields that return double's? It would be good to + # test those. + + field_ids = [ + system.FieldId.DEV_TOTAL_ENERGY_CONSUMPTION, + system.FieldId.DEV_PCIE_COUNT_TX_BYTES, + ] + field_values = device.get_field_values(field_ids) + field_values.validate() + + with pytest.raises(TypeError): + field_values["invalid_index"] + + assert isinstance(field_values, system.FieldValues) + assert len(field_values) == len(field_ids) + + raw_values = field_values.get_all_values() + assert all(x == y.value for x, y in zip(raw_values, field_values)) + + for field_id, field_value in zip(field_ids, field_values): + assert field_value.field_id == field_id + assert type(field_value.value) is int + assert field_value.latency_usec >= 0 + assert field_value.timestamp >= 0 + + orig_timestamp = field_values[0].timestamp + field_values = device.get_field_values(field_ids) + assert field_values[0].timestamp >= orig_timestamp + + # Test only one element, because that's weirdly a special case + field_ids = [ + system.FieldId.DEV_PCIE_REPLAY_COUNTER, + ] + field_values = device.get_field_values(field_ids) + assert len(field_values) == 1 + field_values.validate() + old_value = field_values[0].value + + # Test clear_field_values + device.clear_field_values(field_ids) + field_values = device.get_field_values(field_ids) + field_values.validate() + assert len(field_values) == 1 + assert field_values[0].value <= old_value