diff --git a/src/driver/amdxdna/Makefile b/src/driver/amdxdna/Makefile index 4e3b3b631..9bec999d7 100644 --- a/src/driver/amdxdna/Makefile +++ b/src/driver/amdxdna/Makefile @@ -52,18 +52,15 @@ DEFINES += -DMODULE_VER_STR='\"$(MODULE_VER_STR)\"' ifeq ($(XDNA_BUS_TYPE), of) DEFINES += -DAMDXDNA_OF -# Generate config_kernel.h when building directly (Yocto/DKMS flow) +endif + +# Generate config_kernel.h if not already present (CMake or direct build) modules: $(SRC_DIR)/config_kernel.h $(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) CFLAGS_MODULE="$(DEFINES)" OFT_CONFIG_AMDXDNA_PCI=$(PCI) OFT_CONFIG_AMDXDNA_OF=$(OF) $(USE_LLVM) modules $(SRC_DIR)/config_kernel.h: - @echo "[INFO] Generating config_kernel.h for OF build..." + @echo "[INFO] Generating config_kernel.h..." @KERNEL_SRC=$(KERNEL_SRC) OUT="$(SRC_DIR)/config_kernel.h" bash $(SRC_DIR)/../../../../tools/configure_kernel.sh -else -# PCI builds: CMake generates config_kernel.h -modules: - $(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) CFLAGS_MODULE="$(DEFINES)" OFT_CONFIG_AMDXDNA_PCI=$(PCI) OFT_CONFIG_AMDXDNA_OF=$(OF) $(USE_LLVM) modules -endif modules_install: $(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) modules_install diff --git a/src/driver/amdxdna/aie2_pci.c b/src/driver/amdxdna/aie2_pci.c index 2416e683d..a57c27dac 100644 --- a/src/driver/amdxdna/aie2_pci.c +++ b/src/driver/amdxdna/aie2_pci.c @@ -843,6 +843,7 @@ static int aie2_query_clock_metadata(struct amdxdna_client *client, ndev = xdna->dev_handle; + aie2_update_counters(ndev); snprintf(clock.mp_npu_clock.name, sizeof(clock.mp_npu_clock.name), "MP-NPU Clock"); clock.mp_npu_clock.freq_mhz = ndev->npuclk_freq; @@ -859,32 +860,62 @@ static int aie2_query_clock_metadata(struct amdxdna_client *client, static int aie2_query_sensors(struct amdxdna_client *client, struct amdxdna_drm_get_info *args) { - struct amdxdna_drm_query_sensor *sensor; - struct amdxdna_dev *xdna = client->xdna; - int ret = 0; - int min; +#ifdef HAVE_7_0_amd_pmf_get_npu_data + struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle; + struct amdxdna_drm_query_sensor sensor = {}; + struct amd_pmf_npu_metrics npu_metrics; + u32 sensors_count = 0, i; + int ret; - if (!access_ok(u64_to_user_ptr(args->buffer), args->buffer_size)) { - XDNA_ERR(xdna, "Failed to access buffer size %d", args->buffer_size); - return -EFAULT; + ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics); + if (ret) { + XDNA_ERR(client->xdna, "PMF get npu data failed, ret %d", ret); + return ret; } - min = min(args->buffer_size, sizeof(*sensor)); - sensor = kzalloc(sizeof(*sensor), GFP_KERNEL); - if (!sensor) - return -ENOMEM; + sensor.type = AMDXDNA_SENSOR_TYPE_POWER; + sensor.input = npu_metrics.npu_power; + sensor.unitm = -3; + snprintf(sensor.label, sizeof(sensor.label), "Total Power"); + snprintf(sensor.units, sizeof(sensor.units), "mW"); - sensor->type = AMDXDNA_SENSOR_TYPE_POWER; - sensor->input = __UINT32_MAX__; /* TODO: query the device and get the power data */ - sensor->unitm = -3; /* in milliwatts */ - snprintf(sensor->label, sizeof(sensor->label), "Total Power"); - snprintf(sensor->units, sizeof(sensor->units), "mW"); + if (args->buffer_size < sizeof(sensor)) + goto out; - if (copy_to_user(u64_to_user_ptr(args->buffer), sensor, min)) - ret = -EFAULT; + if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, sizeof(sensor))) + return -EFAULT; - kfree(sensor); - return ret; + args->buffer_size -= sizeof(sensor); + sensors_count++; + + for (i = 0; i < min_t(u32, ndev->total_col, 8); i++) { + memset(&sensor, 0, sizeof(sensor)); + sensor.input = npu_metrics.npu_busy[i]; + sensor.type = AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION; + sensor.unitm = 0; + snprintf(sensor.label, sizeof(sensor.label), + "Column %d Utilization", i); + snprintf(sensor.units, sizeof(sensor.units), "%%"); + + if (args->buffer_size < sizeof(sensor)) + goto out; + + if (copy_to_user(u64_to_user_ptr(args->buffer) + + sensors_count * sizeof(sensor), + &sensor, sizeof(sensor))) + return -EFAULT; + + args->buffer_size -= sizeof(sensor); + sensors_count++; + } + +out: + args->buffer_size = sensors_count * sizeof(sensor); + + return 0; +#else + return -EOPNOTSUPP; +#endif } static int aie2_query_ctx_status(struct amdxdna_client *client, @@ -1111,6 +1142,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client, return -EFAULT; } + aie2_update_counters(ndev); res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk; res_info.npu_tops_max = ndev->max_tops; res_info.npu_task_max = priv->hwctx_limit; diff --git a/src/driver/amdxdna/aie2_pci.h b/src/driver/amdxdna/aie2_pci.h index 1117036f9..f77645448 100644 --- a/src/driver/amdxdna/aie2_pci.h +++ b/src/driver/amdxdna/aie2_pci.h @@ -62,6 +62,41 @@ pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \ }) +#ifdef HAVE_7_0_amd_pmf_get_npu_data +#include +#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics) +#define AIE2_GET_PMF_NPU_DATA(field, val) \ +({ \ + struct amd_pmf_npu_metrics _npu_metrics; \ + int _ret; \ + \ + _ret = amd_pmf_get_npu_data(&_npu_metrics); \ + val = _ret ? U32_MAX : _npu_metrics.field; \ + (_ret); \ +}) +#else +#define AIE2_GET_PMF_NPU_METRICS(metrics) \ +({ \ + typeof(metrics) _m = metrics; \ + memset(_m, 0xff, sizeof(*_m)); \ + (-EOPNOTSUPP); \ +}) + +#define SENSOR_DEFAULT_npu_power U32_MAX +#define AIE2_GET_PMF_NPU_DATA(field, val) \ +({ \ + val = SENSOR_DEFAULT_##field; \ + (-EOPNOTSUPP); \ +}) +#endif + +#define aie2_update_counters(ndev) \ +({ \ + typeof(ndev) _ndev = ndev; \ + if (_ndev->priv->hw_ops.update_counters) \ + _ndev->priv->hw_ops.update_counters(_ndev); \ +}) + #define SMU_DPM_INVALID 0xffffffff #define SMU_DPM_MAX(ndev) \ ((ndev)->smu.num_dpm_levels - 1) @@ -359,6 +394,11 @@ int aie2_smu_start(struct amdxdna_dev_hdl *ndev); void aie2_smu_stop(struct amdxdna_dev_hdl *ndev); int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level); +#ifdef HAVE_7_0_amd_pmf_get_npu_data +int npu4_update_counters(struct amdxdna_dev_hdl *ndev); +#else +#define npu4_update_counters NULL +#endif int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev); int aie2_smu_get_hclock_freq(struct amdxdna_dev_hdl *ndev); int aie2_smu_set_power_on(struct amdxdna_dev_hdl *ndev); diff --git a/src/driver/amdxdna/aie2_smu.c b/src/driver/amdxdna/aie2_smu.c index 13fb82a08..cd45e9e40 100644 --- a/src/driver/amdxdna/aie2_smu.c +++ b/src/driver/amdxdna/aie2_smu.c @@ -72,6 +72,26 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level) return 0; } +#ifdef HAVE_7_0_amd_pmf_get_npu_data +int npu4_update_counters(struct amdxdna_dev_hdl *ndev) +{ + struct amd_pmf_npu_metrics npu_metrics; + int ret; + + ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics); + if (ret) { + XDNA_ERR(ndev->xdna, "PMF get npu data failed, ret %d", ret); + return ret; + } + + ndev->npuclk_freq = npu_metrics.mpnpuclk_freq; + ndev->hclk_freq = npu_metrics.npuclk_freq; + ndev->curr_tops = 4096 * ndev->total_col * ndev->hclk_freq / 1000000; + + return 0; +} +#endif + int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev) { return ndev->npuclk_freq; diff --git a/src/driver/amdxdna/amdxdna_aie.h b/src/driver/amdxdna/amdxdna_aie.h index 44d543b90..3ce7730b2 100644 --- a/src/driver/amdxdna/amdxdna_aie.h +++ b/src/driver/amdxdna/amdxdna_aie.h @@ -65,6 +65,7 @@ enum aie_power_state { struct amdxdna_dev_hdl; struct aie_hw_ops { int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level); + int (*update_counters)(struct amdxdna_dev_hdl *ndev); int (*get_tops)(struct amdxdna_dev_hdl *ndev, u64 *max, u64 *curr); }; diff --git a/src/driver/amdxdna/amdxdna_pci_drv.c b/src/driver/amdxdna/amdxdna_pci_drv.c index b620ac62f..3bcf22947 100644 --- a/src/driver/amdxdna/amdxdna_pci_drv.c +++ b/src/driver/amdxdna/amdxdna_pci_drv.c @@ -332,3 +332,11 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("XRT Team "); MODULE_VERSION("0.1"); MODULE_DESCRIPTION("amdxdna driver"); + +#ifdef HAVE_7_0_amd_pmf_get_npu_data +#ifdef HAVE_6_13_MODULE_IMPORT_NS +MODULE_IMPORT_NS("AMD_PMF"); +#else +MODULE_IMPORT_NS(AMD_PMF); +#endif +#endif diff --git a/src/driver/amdxdna/npu4_family.h b/src/driver/amdxdna/npu4_family.h index 9b4c8f55f..afd91b76f 100644 --- a/src/driver/amdxdna/npu4_family.h +++ b/src/driver/amdxdna/npu4_family.h @@ -105,6 +105,7 @@ extern const struct amdxdna_dev_priv npu4_dev_priv; }, \ .hw_ops = { \ .set_dpm = npu4_set_dpm, \ + .update_counters = npu4_update_counters, \ } #define NPU4_COMMON_DEV_INFO \ diff --git a/src/driver/tools/configure_kernel.sh b/src/driver/tools/configure_kernel.sh index c29fe0408..fa5907223 100755 --- a/src/driver/tools/configure_kernel.sh +++ b/src/driver/tools/configure_kernel.sh @@ -294,6 +294,20 @@ int main(void) } EOF +# Test amd_pmf_get_npu_data exists +try_compile HAVE_7_0_amd_pmf_get_npu_data << 'EOF' +#include +#include +int main(void) +{ + MODULE_IMPORT_NS("AMD_PMF"); + + struct amd_pmf_npu_metrics info; + int ret = amd_pmf_get_npu_data(&info); + return 0; +} +EOF + # ---- Header trailer ---------------------------------------------------- cat >> "$OUT" < m_handle; void - hw_ctx_init(device* dev, const char *tag, const flow_type* flow) + hw_ctx_init(device* dev, const char *tag, const flow_type* flow, + const xrt::hw_context::qos_type& qos) { xrt::xclbin xclbin; xrt::elf elf; @@ -49,7 +56,6 @@ class hw_ctx { "specify xclbin path or run \"build.sh -xclbin_only\" to download them"); } - xrt::hw_context::qos_type qos{ {"gops", 100}, {"priority", 0x180} }; xrt::hw_context::access_mode mode = xrt::hw_context::access_mode::shared; if (is_full_elf) { m_handle = dev->create_hw_context(elf.get_partition_size(), qos, mode); diff --git a/test/shim_test/io.cpp b/test/shim_test/io.cpp index 2f03f93c6..3fad8ce1f 100644 --- a/test/shim_test/io.cpp +++ b/test/shim_test/io.cpp @@ -1181,6 +1181,25 @@ run_no_check_result() run(wfences, sfences, true); } +void +dpm_test_bo_set:: +run_with_ctx(hw_ctx& hwctx) +{ + auto hwq = hwctx.get()->get_hw_queue(); + + init_cmd(hwctx, false); + sync_before_run(); + + auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get(); + reset_cmd_header(); + + hwq->submit_command(cbo->get()); + hwq->wait_command(cbo->get(), 0); + + sync_after_run(); + verify_result(); +} + std::array& io_test_bo_set_base:: get_bos() diff --git a/test/shim_test/io.h b/test/shim_test/io.h index c1b5804e4..5ca22a8b7 100644 --- a/test/shim_test/io.h +++ b/test/shim_test/io.h @@ -252,6 +252,14 @@ class elf_io_aie_debug_test_bo_set : public io_test_bo_set_base bool m_is_full_elf = false; }; +class dpm_test_bo_set : public elf_io_test_bo_set +{ +public: + using elf_io_test_bo_set::elf_io_test_bo_set; + + void run_with_ctx(hw_ctx& hwctx); +}; + /** Create a BO set appropriate for the device and tag. * LEGACY -> io_test_bo_set; PARTIAL_ELF -> elf_io_test_bo_set; FULL_ELF -> elf_full_io_test_bo_set. */ diff --git a/test/shim_test/io_test.cpp b/test/shim_test/io_test.cpp index f9b148e66..5a900ef28 100644 --- a/test/shim_test/io_test.cpp +++ b/test/shim_test/io_test.cpp @@ -12,11 +12,20 @@ #include #include #include +#include +#include #include +#include + +// FIXME +#include "../../src/include/uapi/drm_local/amdxdna_accel.h" +// end of FIXME using namespace xrt_core; using arg_type = const std::vector; +extern int open_accel_fd(device* dev); + namespace { io_test_parameter io_test_parameters; @@ -366,6 +375,102 @@ io_test(device::id_type id, device* dev, int total_hwq_submit, int num_cmdlist, } } +struct dpm_clk_entry { + uint32_t npuclk; + uint32_t hclk; +}; + +const dpm_clk_entry npu4_dpm_table[] = { + {396, 792}, + {600, 1056}, + {792, 1152}, + {975, 1267}, + {975, 1267}, + {1056, 1408}, + {1152, 1584}, + {1267, 1800}, +}; + +constexpr int DPM_NUM_LEVELS = 8; +constexpr uint32_t HCLK_MARGIN_PCT = 2; +constexpr uint32_t DPM_COL_OPC = 4096; +constexpr uint32_t DPM_NOP_NUM_COL = 4; +constexpr uint32_t DPM_MAX_OPC = DPM_COL_OPC * DPM_NOP_NUM_COL; + +constexpr uint32_t SYS_EFF_FACTOR = 2; + +uint32_t +query_hclk(device* dev) +{ + int fd = open_accel_fd(dev); + amdxdna_drm_query_clock_metadata clock = {}; + amdxdna_drm_get_info arg = { + .param = DRM_AMDXDNA_QUERY_CLOCK_METADATA, + .buffer_size = sizeof(clock), + .buffer = reinterpret_cast(&clock), + }; + + int ret = ::ioctl(fd, DRM_IOCTL_AMDXDNA_GET_INFO, &arg); + close(fd); + if (ret == -1) + throw std::runtime_error("ioctl(QUERY_CLOCK_METADATA) failed"); + + return clock.h_clock.freq_mhz; +} + +void +set_power_mode(device* dev, int mode) +{ + int fd = open_accel_fd(dev); + amdxdna_drm_set_power_mode pm = {}; + pm.power_mode = static_cast(mode); + + amdxdna_drm_set_state arg = { + .param = DRM_AMDXDNA_SET_POWER_MODE, + .buffer_size = sizeof(pm), + .buffer = reinterpret_cast(&pm), + }; + + int ret = ::ioctl(fd, DRM_IOCTL_AMDXDNA_SET_STATE, &arg); + close(fd); + if (ret == -1) + throw std::runtime_error("ioctl(SET_POWER_MODE) failed for mode " + std::to_string(mode)); +} + +bool +hclk_within_margin(uint32_t actual, uint32_t expected) +{ + uint32_t margin = expected * HCLK_MARGIN_PCT / 100; + if (margin == 0) + margin = 1; + return actual >= expected - margin && actual <= expected + margin; +} + +void +verify_hclk(device* dev, uint32_t expected, const std::string& ctx) +{ + constexpr int timeout_ms = 20000; + constexpr int poll_interval_ms = 10; + uint32_t actual = 0; + + auto deadline = std::chrono::steady_clock::now() + std::chrono::milliseconds(timeout_ms); + do { + actual = query_hclk(dev); + if (hclk_within_margin(actual, expected)) + break; + std::this_thread::sleep_for(std::chrono::milliseconds(poll_interval_ms)); + } while (std::chrono::steady_clock::now() < deadline); + + if (!hclk_within_margin(actual, expected)) { + throw std::runtime_error(ctx + ": expected H-clock ~" + std::to_string(expected) + + " MHz (±" + std::to_string(HCLK_MARGIN_PCT) + "%), got " + + std::to_string(actual) + " MHz (after " + + std::to_string(timeout_ms) + "ms polling)"); + } + std::cout << " " << ctx << ": H-clock " << actual << " MHz (expected ~" + << expected << ") [OK]" << std::endl; +} + } void @@ -657,7 +762,7 @@ TEST_io_runlist_bad_cmd(device::id_type id, std::shared_ptr& sdev, arg_t elf_io_negative_test_bo_set timeout_bo_set{dev, "bad_timeout"}; std::unique_ptr error_bo_set; // An error one - if (!is_timeout) + if (!is_timeout) error_bo_set = std::make_unique(dev, "bad_op"); // Creating HW context for cmd submission. We use the good xclbin here to @@ -980,3 +1085,144 @@ TEST_io_aie_reg(device::id_type id, std::shared_ptr& sdev, arg_type& arg } } } + +void +TEST_dpm_noop_no_qos(device::id_type id, std::shared_ptr& sdev, arg_type& arg) +{ + auto dev = sdev.get(); + uint32_t max_hclk = npu4_dpm_table[DPM_NUM_LEVELS - 1].hclk; + + set_power_mode(dev, POWER_MODE_DEFAULT); + + { + hw_ctx hwctx{dev, "nop"}; + dpm_test_bo_set nop{dev, "nop"}; + nop.run_with_ctx(hwctx); + verify_hclk(dev, max_hclk, "noop context (no fps/latency QoS)"); + } +} + +void +TEST_dpm_power_modes(device::id_type id, std::shared_ptr& sdev, arg_type& arg) +{ + auto dev = sdev.get(); + uint32_t max_hclk = npu4_dpm_table[DPM_NUM_LEVELS - 1].hclk; + uint32_t low_hclk = npu4_dpm_table[0].hclk; + uint32_t med_hclk = npu4_dpm_table[DPM_NUM_LEVELS / 2].hclk; + + set_power_mode(dev, POWER_MODE_TURBO); + verify_hclk(dev, max_hclk, "POWER_MODE_TURBO"); + + set_power_mode(dev, POWER_MODE_LOW); + verify_hclk(dev, low_hclk, "POWER_MODE_LOW"); + + set_power_mode(dev, POWER_MODE_MEDIUM); + verify_hclk(dev, med_hclk, "POWER_MODE_MEDIUM"); + + set_power_mode(dev, POWER_MODE_HIGH); + verify_hclk(dev, max_hclk, "POWER_MODE_HIGH"); + + set_power_mode(dev, POWER_MODE_LOW); + verify_hclk(dev, low_hclk, "POWER_MODE_LOW"); + + set_power_mode(dev, POWER_MODE_DEFAULT); +} + +void +TEST_dpm_refcount_scaling(device::id_type id, std::shared_ptr& sdev, arg_type& arg) +{ + auto dev = sdev.get(); + const auto* tbl = npu4_dpm_table; + uint32_t factor = SYS_EFF_FACTOR; + + std::cout << " Platform info: col_opc=" << DPM_COL_OPC + << " num_col=" << DPM_NOP_NUM_COL + << " max_opc=" << DPM_MAX_OPC + << " sys_eff_factor=" << factor << std::endl; + + /* + * Compute per-level GOPs thresholds to target each DPM level. + * The driver computes: req_gops = gops * fps * sys_eff_factor + * and picks the lowest level where req_gops <= max_opc * hclk / 1000. + * We divide capacities by the factor so the gops param we pass + * results in the correct req_gops after the driver's multiplication. + */ + struct level_qos { + uint32_t gops; + uint32_t fps; + uint32_t expected_hclk; + }; + + std::vector levels; + for (int i = 0; i < DPM_NUM_LEVELS; i++) { + uint32_t raw_capacity = DPM_MAX_OPC * tbl[i].hclk / 1000; + uint32_t capacity = raw_capacity / factor; + uint32_t prev_raw = (i > 0) ? DPM_MAX_OPC * tbl[i - 1].hclk / 1000 : 0; + uint32_t prev_capacity = prev_raw / factor; + + uint32_t target = (prev_capacity > 0) ? prev_capacity + 1 : 1; + if (target > capacity) + target = capacity; + + levels.push_back({target, 1, tbl[i].hclk}); + + std::cout << " Level " << i + << ": hclk=" << tbl[i].hclk + << " raw_cap=" << raw_capacity + << " eff_cap=" << capacity + << " target_gops=" << target << std::endl; + } + + set_power_mode(dev, POWER_MODE_DEFAULT); + + std::vector> ctxs; + + std::cout << " Phase 1: Creating " << DPM_NUM_LEVELS << " contexts (DPM scaling up)" << std::endl; + for (int i = 0; i < DPM_NUM_LEVELS; i++) { + xrt::hw_context::qos_type qos{ + {"gops", levels[i].gops}, + {"fps", levels[i].fps}, + {"priority", 0x180}, + }; + + uint32_t drv_req_gops = levels[i].fps * levels[i].gops * factor; + std::cout << " Creating context " << i + << ": qos{gops=" << levels[i].gops + << ", fps=" << levels[i].fps + << ", priority=0x180}" + << " drv_req_gops=" << drv_req_gops << std::endl; + + ctxs.push_back(std::make_unique(dev, qos, "nop")); + dpm_test_bo_set nop{dev, "nop"}; + nop.run_with_ctx(*ctxs.back()); + std::cout << " Context " << i << " created successfully" << std::endl; + + uint32_t expected = levels[i].expected_hclk; + for (int j = 0; j < i; j++) { + if (levels[j].expected_hclk > expected) + expected = levels[j].expected_hclk; + } + + verify_hclk(dev, expected, "after context " + std::to_string(i) + + " (target level " + std::to_string(i) + ")"); + } + + std::cout << " Phase 2: Destroying " << DPM_NUM_LEVELS << " contexts (DPM scaling down)" << std::endl; + for (int i = DPM_NUM_LEVELS - 1; i >= 0; i--) { + ctxs.pop_back(); + std::cout << " Destroyed context " << i << std::endl; + + if (i == 0) { + verify_hclk(dev, tbl[0].hclk, "after destroying last context (all refs gone)"); + break; + } + + uint32_t expected = 0; + for (int j = 0; j < i; j++) { + if (levels[j].expected_hclk > expected) + expected = levels[j].expected_hclk; + } + + verify_hclk(dev, expected, "after destroying context " + std::to_string(i)); + } +} diff --git a/test/shim_test/shim_test.cpp b/test/shim_test/shim_test.cpp index 8abf9ca06..33aca86dd 100644 --- a/test/shim_test/shim_test.cpp +++ b/test/shim_test/shim_test.cpp @@ -81,6 +81,9 @@ void TEST_preempt_full_elf_io(device::id_type, std::shared_ptr&, arg_typ void TEST_io_coredump(device::id_type, std::shared_ptr&, arg_type&); void TEST_io_aie_mem(device::id_type, std::shared_ptr&, arg_type&); void TEST_io_aie_reg(device::id_type, std::shared_ptr&, arg_type&); +void TEST_dpm_noop_no_qos(device::id_type, std::shared_ptr&, arg_type&); +void TEST_dpm_power_modes(device::id_type, std::shared_ptr&, arg_type&); +void TEST_dpm_refcount_scaling(device::id_type, std::shared_ptr&, arg_type&); inline void set_xrt_path() @@ -1071,6 +1074,15 @@ std::vector test_list { test_case{ "create and free user ptr BO with mmapped ptr", {}, TEST_POSITIVE, dev_filter_xdna, TEST_create_free_mmaped_uptr_bo, {} }, + test_case{ "DPM noop (no QoS)", {}, + TEST_POSITIVE, dev_filter_is_npu4, TEST_dpm_noop_no_qos, {} + }, + test_case{ "DPM refcount scaling", {}, + TEST_POSITIVE, dev_filter_is_npu4, TEST_dpm_refcount_scaling, {} + }, + test_case{ "DPM power modes", {}, + TEST_POSITIVE, dev_filter_is_npu4, TEST_dpm_power_modes, {} + }, }; void