Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions src/driver/amdxdna/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,15 @@ DEFINES += -DMODULE_VER_STR='\"$(MODULE_VER_STR)\"'

ifeq ($(XDNA_BUS_TYPE), of)
DEFINES += -DAMDXDNA_OF
# Generate config_kernel.h when building directly (Yocto/DKMS flow)
endif

# Generate config_kernel.h if not already present (CMake or direct build)
modules: $(SRC_DIR)/config_kernel.h
$(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) CFLAGS_MODULE="$(DEFINES)" OFT_CONFIG_AMDXDNA_PCI=$(PCI) OFT_CONFIG_AMDXDNA_OF=$(OF) $(USE_LLVM) modules

$(SRC_DIR)/config_kernel.h:
@echo "[INFO] Generating config_kernel.h for OF build..."
@echo "[INFO] Generating config_kernel.h..."
@KERNEL_SRC=$(KERNEL_SRC) OUT="$(SRC_DIR)/config_kernel.h" bash $(SRC_DIR)/../../../../tools/configure_kernel.sh
else
# PCI builds: CMake generates config_kernel.h
modules:
$(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) CFLAGS_MODULE="$(DEFINES)" OFT_CONFIG_AMDXDNA_PCI=$(PCI) OFT_CONFIG_AMDXDNA_OF=$(OF) $(USE_LLVM) modules
endif

modules_install:
$(MAKE) -C $(KERNEL_SRC) M=$(SRC_DIR) modules_install
Expand Down
72 changes: 52 additions & 20 deletions src/driver/amdxdna/aie2_pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,7 @@ static int aie2_query_clock_metadata(struct amdxdna_client *client,

ndev = xdna->dev_handle;

aie2_update_counters(ndev);
snprintf(clock.mp_npu_clock.name, sizeof(clock.mp_npu_clock.name),
"MP-NPU Clock");
clock.mp_npu_clock.freq_mhz = ndev->npuclk_freq;
Expand All @@ -859,32 +860,62 @@ static int aie2_query_clock_metadata(struct amdxdna_client *client,
static int aie2_query_sensors(struct amdxdna_client *client,
struct amdxdna_drm_get_info *args)
{
struct amdxdna_drm_query_sensor *sensor;
struct amdxdna_dev *xdna = client->xdna;
int ret = 0;
int min;
#ifdef HAVE_7_0_amd_pmf_get_npu_data
struct amdxdna_dev_hdl *ndev = client->xdna->dev_handle;
struct amdxdna_drm_query_sensor sensor = {};
struct amd_pmf_npu_metrics npu_metrics;
u32 sensors_count = 0, i;
int ret;

if (!access_ok(u64_to_user_ptr(args->buffer), args->buffer_size)) {
XDNA_ERR(xdna, "Failed to access buffer size %d", args->buffer_size);
return -EFAULT;
ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
if (ret) {
XDNA_ERR(client->xdna, "PMF get npu data failed, ret %d", ret);
return ret;
}

min = min(args->buffer_size, sizeof(*sensor));
sensor = kzalloc(sizeof(*sensor), GFP_KERNEL);
if (!sensor)
return -ENOMEM;
sensor.type = AMDXDNA_SENSOR_TYPE_POWER;
sensor.input = npu_metrics.npu_power;
sensor.unitm = -3;
snprintf(sensor.label, sizeof(sensor.label), "Total Power");
snprintf(sensor.units, sizeof(sensor.units), "mW");

sensor->type = AMDXDNA_SENSOR_TYPE_POWER;
sensor->input = __UINT32_MAX__; /* TODO: query the device and get the power data */
sensor->unitm = -3; /* in milliwatts */
snprintf(sensor->label, sizeof(sensor->label), "Total Power");
snprintf(sensor->units, sizeof(sensor->units), "mW");
if (args->buffer_size < sizeof(sensor))
goto out;

if (copy_to_user(u64_to_user_ptr(args->buffer), sensor, min))
ret = -EFAULT;
if (copy_to_user(u64_to_user_ptr(args->buffer), &sensor, sizeof(sensor)))
return -EFAULT;

kfree(sensor);
return ret;
args->buffer_size -= sizeof(sensor);
sensors_count++;

for (i = 0; i < min_t(u32, ndev->total_col, 8); i++) {
memset(&sensor, 0, sizeof(sensor));
sensor.input = npu_metrics.npu_busy[i];
sensor.type = AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION;
sensor.unitm = 0;
snprintf(sensor.label, sizeof(sensor.label),
"Column %d Utilization", i);
snprintf(sensor.units, sizeof(sensor.units), "%%");

if (args->buffer_size < sizeof(sensor))
goto out;

if (copy_to_user(u64_to_user_ptr(args->buffer) +
sensors_count * sizeof(sensor),
&sensor, sizeof(sensor)))
return -EFAULT;

args->buffer_size -= sizeof(sensor);
sensors_count++;
}

out:
args->buffer_size = sensors_count * sizeof(sensor);

return 0;
#else
return -EOPNOTSUPP;
#endif
}

static int aie2_query_ctx_status(struct amdxdna_client *client,
Expand Down Expand Up @@ -1111,6 +1142,7 @@ static int aie2_query_resource_info(struct amdxdna_client *client,
return -EFAULT;
}

aie2_update_counters(ndev);
res_info.npu_clk_max = priv->dpm_clk_tbl[ndev->max_dpm_level].hclk;
res_info.npu_tops_max = ndev->max_tops;
res_info.npu_task_max = priv->hwctx_limit;
Expand Down
40 changes: 40 additions & 0 deletions src/driver/amdxdna/aie2_pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,41 @@
pci_resource_len(NDEV2PDEV(_ndev), (_ndev)->xdna->dev_info->mbox_bar); \
})

#ifdef HAVE_7_0_amd_pmf_get_npu_data
#include <linux/amd-pmf-io.h>
#define AIE2_GET_PMF_NPU_METRICS(metrics) amd_pmf_get_npu_data(metrics)
#define AIE2_GET_PMF_NPU_DATA(field, val) \
({ \
struct amd_pmf_npu_metrics _npu_metrics; \
int _ret; \
\
_ret = amd_pmf_get_npu_data(&_npu_metrics); \
val = _ret ? U32_MAX : _npu_metrics.field; \
(_ret); \
})
#else
#define AIE2_GET_PMF_NPU_METRICS(metrics) \
({ \
typeof(metrics) _m = metrics; \
memset(_m, 0xff, sizeof(*_m)); \
(-EOPNOTSUPP); \
})

#define SENSOR_DEFAULT_npu_power U32_MAX
#define AIE2_GET_PMF_NPU_DATA(field, val) \
({ \
val = SENSOR_DEFAULT_##field; \
(-EOPNOTSUPP); \
})
#endif

#define aie2_update_counters(ndev) \
({ \
typeof(ndev) _ndev = ndev; \
if (_ndev->priv->hw_ops.update_counters) \
_ndev->priv->hw_ops.update_counters(_ndev); \
})

#define SMU_DPM_INVALID 0xffffffff
#define SMU_DPM_MAX(ndev) \
((ndev)->smu.num_dpm_levels - 1)
Expand Down Expand Up @@ -359,6 +394,11 @@ int aie2_smu_start(struct amdxdna_dev_hdl *ndev);
void aie2_smu_stop(struct amdxdna_dev_hdl *ndev);
int npu1_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
#ifdef HAVE_7_0_amd_pmf_get_npu_data
int npu4_update_counters(struct amdxdna_dev_hdl *ndev);
#else
#define npu4_update_counters NULL
#endif
int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev);
int aie2_smu_get_hclock_freq(struct amdxdna_dev_hdl *ndev);
int aie2_smu_set_power_on(struct amdxdna_dev_hdl *ndev);
Expand Down
20 changes: 20 additions & 0 deletions src/driver/amdxdna/aie2_smu.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,26 @@ int npu4_set_dpm(struct amdxdna_dev_hdl *ndev, u32 dpm_level)
return 0;
}

#ifdef HAVE_7_0_amd_pmf_get_npu_data
int npu4_update_counters(struct amdxdna_dev_hdl *ndev)
{
struct amd_pmf_npu_metrics npu_metrics;
int ret;

ret = AIE2_GET_PMF_NPU_METRICS(&npu_metrics);
if (ret) {
XDNA_ERR(ndev->xdna, "PMF get npu data failed, ret %d", ret);
return ret;
}

ndev->npuclk_freq = npu_metrics.mpnpuclk_freq;
ndev->hclk_freq = npu_metrics.npuclk_freq;
ndev->curr_tops = 4096 * ndev->total_col * ndev->hclk_freq / 1000000;

return 0;
}
#endif

int aie2_smu_get_mpnpu_clock_freq(struct amdxdna_dev_hdl *ndev)
{
return ndev->npuclk_freq;
Expand Down
1 change: 1 addition & 0 deletions src/driver/amdxdna/amdxdna_aie.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ enum aie_power_state {
struct amdxdna_dev_hdl;
struct aie_hw_ops {
int (*set_dpm)(struct amdxdna_dev_hdl *ndev, u32 dpm_level);
int (*update_counters)(struct amdxdna_dev_hdl *ndev);
int (*get_tops)(struct amdxdna_dev_hdl *ndev, u64 *max, u64 *curr);
};

Expand Down
8 changes: 8 additions & 0 deletions src/driver/amdxdna/amdxdna_pci_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -332,3 +332,11 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("amdxdna driver");

#ifdef HAVE_7_0_amd_pmf_get_npu_data
#ifdef HAVE_6_13_MODULE_IMPORT_NS
MODULE_IMPORT_NS("AMD_PMF");
#else
MODULE_IMPORT_NS(AMD_PMF);
#endif
#endif
1 change: 1 addition & 0 deletions src/driver/amdxdna/npu4_family.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ extern const struct amdxdna_dev_priv npu4_dev_priv;
}, \
.hw_ops = { \
.set_dpm = npu4_set_dpm, \
.update_counters = npu4_update_counters, \
}

#define NPU4_COMMON_DEV_INFO \
Expand Down
14 changes: 14 additions & 0 deletions src/driver/tools/configure_kernel.sh
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,20 @@ int main(void)
}
EOF

# Test amd_pmf_get_npu_data exists
try_compile HAVE_7_0_amd_pmf_get_npu_data << 'EOF'
#include <linux/module.h>
#include <linux/amd-pmf-io.h>
int main(void)
{
MODULE_IMPORT_NS("AMD_PMF");

struct amd_pmf_npu_metrics info;
int ret = amd_pmf_get_npu_data(&info);
return 0;
}
EOF

# ---- Header trailer ----------------------------------------------------

cat >> "$OUT" <<EOF
Expand Down
3 changes: 2 additions & 1 deletion src/include/uapi/drm_local/amdxdna_accel.h
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,8 @@ struct amdxdna_drm_query_sensor {
__u8 status[64];
__u8 units[16];
__s8 unitm;
#define AMDXDNA_SENSOR_TYPE_POWER 0
#define AMDXDNA_SENSOR_TYPE_POWER 0
#define AMDXDNA_SENSOR_TYPE_COLUMN_UTILIZATION 1
__u8 type;
__u8 pad[6];
};
Expand Down
12 changes: 9 additions & 3 deletions test/shim_test/hwctx.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@ class hw_ctx {
public:
hw_ctx(device* dev, const char *tag = nullptr, const flow_type* flow = nullptr)
{
hw_ctx_init(dev, tag, flow);
hw_ctx_init(dev, tag, flow, { {"gops", 100}, {"priority", 0x180} });
}

hw_ctx(device* dev, const xrt::hw_context::qos_type& qos,
const char *tag = nullptr, const flow_type* flow = nullptr)
{
hw_ctx_init(dev, tag, flow, qos);
}

hwctx_handle *
Expand All @@ -30,7 +36,8 @@ class hw_ctx {
std::unique_ptr<hwctx_handle> m_handle;

void
hw_ctx_init(device* dev, const char *tag, const flow_type* flow)
hw_ctx_init(device* dev, const char *tag, const flow_type* flow,
const xrt::hw_context::qos_type& qos)
{
xrt::xclbin xclbin;
xrt::elf elf;
Expand All @@ -49,7 +56,6 @@ class hw_ctx {
"specify xclbin path or run \"build.sh -xclbin_only\" to download them");
}

xrt::hw_context::qos_type qos{ {"gops", 100}, {"priority", 0x180} };
xrt::hw_context::access_mode mode = xrt::hw_context::access_mode::shared;
if (is_full_elf) {
m_handle = dev->create_hw_context(elf.get_partition_size(), qos, mode);
Expand Down
19 changes: 19 additions & 0 deletions test/shim_test/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1181,6 +1181,25 @@ run_no_check_result()
run(wfences, sfences, true);
}

void
dpm_test_bo_set::
run_with_ctx(hw_ctx& hwctx)
{
auto hwq = hwctx.get()->get_hw_queue();

init_cmd(hwctx, false);
sync_before_run();

auto cbo = m_bo_array[IO_TEST_BO_CMD].tbo.get();
reset_cmd_header();

hwq->submit_command(cbo->get());
hwq->wait_command(cbo->get(), 0);

sync_after_run();
verify_result();
}

std::array<io_test_bo, IO_TEST_BO_MAX_TYPES>&
io_test_bo_set_base::
get_bos()
Expand Down
8 changes: 8 additions & 0 deletions test/shim_test/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,14 @@ class elf_io_aie_debug_test_bo_set : public io_test_bo_set_base
bool m_is_full_elf = false;
};

class dpm_test_bo_set : public elf_io_test_bo_set
{
public:
using elf_io_test_bo_set::elf_io_test_bo_set;

void run_with_ctx(hw_ctx& hwctx);
};

/** Create a BO set appropriate for the device and tag.
* LEGACY -> io_test_bo_set; PARTIAL_ELF -> elf_io_test_bo_set; FULL_ELF -> elf_full_io_test_bo_set.
*/
Expand Down
Loading