diff --git a/include/onnxruntime/core/session/onnxruntime_ep_c_api.h b/include/onnxruntime/core/session/onnxruntime_ep_c_api.h index 34a57b11e1748..d487437202cab 100644 --- a/include/onnxruntime/core/session/onnxruntime_ep_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_ep_c_api.h @@ -2356,6 +2356,10 @@ struct OrtEp { * * The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it. * + * A single string is stored per EP (not per device). If the EP may later be validated against multiple devices + * (e.g., multi-adapter or multi-GPU), serialize enough information here to evaluate compatibility against each such + * device individually. See ValidateCompiledModelCompatibilityInfo for how the per-device verdicts are combined. + * * \param[in] this_ptr The OrtEp instance. * \param[in] graph The OrtGraph instance for which to generate compatibility information. * @@ -2795,16 +2799,49 @@ struct OrtEpFactory { /** \brief Validate the compatibility of a compiled model with the execution provider factory for one or more devices. * - * Given a compatibility info string produced during model compilation, the EP factory should determine whether the - * compiled model is compatible with the EP factory when targeting the provided hardware devices. All devices provided - * must belong to the same execution provider instance that this factory creates. - * - * The EP factory implementation should consider the set of devices (e.g., multi-adapter or multi-GPU scenarios) when - * evaluating compatibility and set `model_compatibility` accordingly. + * Given a compatibility info string produced during model compilation (see OrtEp::GetCompiledModelCompatibilityInfo), + * the EP factory determines whether the compiled model is compatible with the EP factory when targeting the provided + * hardware devices, and reports a single OrtCompiledModelCompatibility verdict for the whole set. + * + * All devices provided belong to the same execution provider instance that this factory creates. The set represents + * the devices the EP would run the model on *together* (e.g., multi-adapter or multi-GPU scenarios), NOT a menu of + * candidate placements to choose the best one from. Because the function returns a single verdict for the entire set, + * that verdict must describe running on the set as a whole (a conjunction): if the model cannot run on one of the + * devices the EP would use, the model cannot run on the set. + * + * A single-device EP (the common case) may ignore `devices`/`num_devices` and validate `compatibility_info` against + * its own configuration. The per-device algorithm below is required only when the EP may run a model across more + * than one device at once. + * + * Required implementation when num_devices > 1 (a "best of any device" result is NOT permitted -- a single verdict + * cannot convey which device it applies to, so ORT would otherwise be told a model is runnable on a set that + * contains a device it cannot run on): + * 1. Compute a per-device verdict (e.g., by validating `compatibility_info` against each device individually). + * 2. Combine the per-device verdicts into one. Treat EP_NOT_APPLICABLE as a neutral value (skip it) and take the + * worst of the remaining verdicts: + * - if any device is EP_UNSUPPORTED -> EP_UNSUPPORTED + * - else if any device is EP_SUPPORTED_PREFER_RECOMPILATION -> EP_SUPPORTED_PREFER_RECOMPILATION + * - else if at least one device is EP_SUPPORTED_OPTIMAL -> EP_SUPPORTED_OPTIMAL + * - else (every device was EP_NOT_APPLICABLE) -> EP_NOT_APPLICABLE + * Equivalently: report EP_SUPPORTED_OPTIMAL only if every device the EP has an opinion on is optimal. + * + * Choosing the verdict for the "no opinion" and "bad artifact" cases (the compatibility string is opaque to ORT and + * is interpreted only by the EP that produced it): + * - EP_NOT_APPLICABLE: the EP has no opinion -- `compatibility_info` is empty or was clearly produced by a + * different EP. ORT treats this as "no compiled artifact for this EP": session creation proceeds, and in model + * package variant selection the variant remains eligible but at the lowest priority. + * - EP_UNSUPPORTED: the string appears to be this EP's but the compiled model cannot run on the target + * hardware/configuration. ORT rejects it (fails session creation / excludes the variant). Returning + * EP_NOT_APPLICABLE for a corrupt or stale artifact that is actually this EP's would let it pass silently. + * + * Note that a single string is stored per EP (not per device), so GetCompiledModelCompatibilityInfo should serialize + * enough information to evaluate the string against every device the EP may later be asked to validate against. * * \param[in] this_ptr The OrtEpFactory instance. * \param[in] devices Array of OrtHardwareDevice pointers that the EP would run on. All must map to this EP. - * \param[in] num_devices Number of entries in `devices`. + * \param[in] num_devices Number of entries in `devices`. May be 0 when no device-specific context is available; + * in that case evaluate `compatibility_info` against the EP's own configuration and do NOT + * dereference `devices`. * \param[in] compatibility_info The compatibility information string produced when the model was compiled. * \param[out] model_compatibility OrtCompiledModelCompatibility value describing the compatibility of the model with the EP. * diff --git a/onnxruntime/test/autoep/library/example_plugin_ep/compatibility_combine.h b/onnxruntime/test/autoep/library/example_plugin_ep/compatibility_combine.h new file mode 100644 index 0000000000000..573cf37750d65 --- /dev/null +++ b/onnxruntime/test/autoep/library/example_plugin_ep/compatibility_combine.h @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/session/onnxruntime_c_api.h" + +namespace example_ep { + +// Maps an OrtCompiledModelCompatibility value to an ordinal where a lower rank is a "worse" verdict +// (EP_UNSUPPORTED < EP_SUPPORTED_PREFER_RECOMPILATION < EP_SUPPORTED_OPTIMAL). EP_NOT_APPLICABLE is the identity +// element of the fold and is handled by CombineCompatibility before this is called, so it falls into the +// conservative default below. +inline int RankCompatibility(OrtCompiledModelCompatibility c) { + switch (c) { + case OrtCompiledModelCompatibility_EP_UNSUPPORTED: + return 0; + case OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION: + return 1; + case OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL: + return 2; + default: + // Conservative: treat any unknown/unhandled value as the worst rank so an unexpected value can never be + // reported as compatible. Update this switch if new OrtCompiledModelCompatibility values are added. + return 0; + } +} + +// Combines two per-device verdicts following the rule documented for +// OrtEpFactory::ValidateCompiledModelCompatibilityInfo in onnxruntime_ep_c_api.h: EP_NOT_APPLICABLE is a neutral +// identity (skipped), and otherwise the worst verdict wins. This is the reduction an EP folds over its per-device +// verdicts to produce the single verdict the API must return. +inline OrtCompiledModelCompatibility CombineCompatibility(OrtCompiledModelCompatibility acc, + OrtCompiledModelCompatibility next) { + if (next == OrtCompiledModelCompatibility_EP_NOT_APPLICABLE) { + return acc; + } + if (acc == OrtCompiledModelCompatibility_EP_NOT_APPLICABLE) { + return next; + } + + // Take the verdict with the lower rank, i.e. the worse of the two. + return RankCompatibility(next) < RankCompatibility(acc) ? next : acc; +} + +} // namespace example_ep diff --git a/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc b/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc index 875f70bd29f3c..8ad2e3331ba2f 100644 --- a/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc +++ b/onnxruntime/test/autoep/library/example_plugin_ep/ep_factory.cc @@ -4,7 +4,11 @@ #include "ep_factory.h" #include +#include +#include +#include +#include "compatibility_combine.h" #include "ep.h" #include "ep_allocator.h" #include "ep_arena.h" @@ -435,10 +439,97 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::CreateExternalResourceImporterForDevic return nullptr; } +namespace { + +// Field keys for the example EP's compatibility string format +// ";version=X;ort_api_version=Y;hardware_architecture=Z". Using named constants (rather than literal +// offsets) keeps the parser robust if a key is renamed. +constexpr std::string_view kVersionKey = "version="; +constexpr std::string_view kOrtApiVersionKey = "ort_api_version="; +constexpr std::string_view kHardwareArchKey = "hardware_architecture="; + +// Extracts the value of `key` from a ';'-delimited "k=v;k=v;..." string, or std::nullopt if the key is absent. +// The key must appear at the start of a field (string start or right after a ';'), so "version=" does NOT match +// the "version=" embedded in "ort_api_version=", and the value ends at the next ';' if present (otherwise the rest of the string). +std::optional GetField(const std::string& info, std::string_view key) { + for (size_t search = 0;;) { + size_t pos = info.find(key.data(), search, key.size()); + if (pos == std::string::npos) { + return std::nullopt; + } + const bool at_field_start = (pos == 0) || (info[pos - 1] == ';'); + if (at_field_start) { + size_t value_start = pos + key.size(); + size_t value_end = info.find(';', value_start); + return value_end != std::string::npos ? info.substr(value_start, value_end - value_start) + : info.substr(value_start); + } + search = pos + 1; + } +} + +// Computes the compatibility verdict of a single compatibility string against a single hardware device. +// The compatibility string is opaque to ORT; only the EP that produced it knows how to interpret it. +// +// The architecture a compiled artifact targets is device-specific, so a real multi-device EP derives the expected +// value from `device` (e.g., via HardwareDevice_Type/VendorId/Metadata) and the verdict can differ per device. +// This example demonstrates that by mapping the hardware device type to an arch label. `device` may be nullptr +// (when num_devices == 0), in which case the EP's default configuration is used. +OrtCompiledModelCompatibility ComputeCompatibilityForDevice(const OrtApi& ort_api, + const std::string& ep_version, + const std::string& info, + const OrtHardwareDevice* device) { + std::optional compiled_ep_version = GetField(info, kVersionKey); + if (!compiled_ep_version.has_value()) { + // Our prefix but an unparseable string -> the artifact is ours but unusable. + return OrtCompiledModelCompatibility_EP_UNSUPPORTED; + } + + // Different EP version - might work but prefer recompilation. + if (*compiled_ep_version != ep_version) { + return OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; + } + + // Check ORT API version if present. Different ORT version - might still work but prefer recompilation. + if (std::optional ort_version = GetField(info, kOrtApiVersionKey); ort_version.has_value()) { + if (*ort_version != std::to_string(ORT_API_VERSION)) { + return OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; + } + } + + // Check hardware architecture compatibility if present in the string. The expected arch is derived from the + // target device (CPU -> "arch1"); a mismatch means the artifact was built for a different device. + if (std::optional hardware_arch = GetField(info, kHardwareArchKey); hardware_arch.has_value()) { + std::string expected_arch = "arch1"; // default / num_devices == 0 + if (device != nullptr) { + switch (ort_api.HardwareDevice_Type(device)) { + case OrtHardwareDeviceType_GPU: + expected_arch = "arch2"; + break; + case OrtHardwareDeviceType_NPU: + expected_arch = "arch3"; + break; + default: // CPU and any other type + expected_arch = "arch1"; + break; + } + } + // Different hardware architecture - might still work but prefer recompilation. + if (*hardware_arch != expected_arch) { + return OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; + } + } + + // Everything matches - the compiled model is fully compatible with this device. + return OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL; +} + +} // namespace + OrtStatus* ORT_API_CALL ExampleEpFactory::ValidateCompiledModelCompatibilityInfoImpl( OrtEpFactory* this_ptr, - const OrtHardwareDevice* const* /*devices*/, - size_t /*num_devices*/, + const OrtHardwareDevice* const* devices, + size_t num_devices, const char* compatibility_info, OrtCompiledModelCompatibility* model_compatibility) noexcept { auto& factory = *static_cast(this_ptr); @@ -447,79 +538,41 @@ OrtStatus* ORT_API_CALL ExampleEpFactory::ValidateCompiledModelCompatibilityInfo return factory.ort_api.CreateStatus(ORT_INVALID_ARGUMENT, "model_compatibility cannot be nullptr"); } - // Parse the compatibility info to check if it matches our current configuration. - // The expected format is "ExampleEP;version=0.1.0;ort_api_version=24". - // For this example implementation, we simply check if the string starts with our EP name. - + // The compatibility string is opaque to ORT and is interpreted only by the EP that produced it. An empty string, + // or one that was not produced by this EP, means we have no opinion: report EP_NOT_APPLICABLE. if (compatibility_info == nullptr || compatibility_info[0] == '\0') { - *model_compatibility = OrtCompiledModelCompatibility_EP_UNSUPPORTED; + *model_compatibility = OrtCompiledModelCompatibility_EP_NOT_APPLICABLE; return nullptr; } + // The expected format is ";version=;ort_api_version=;hardware_architecture=". std::string info(compatibility_info); - std::string expected_prefix = factory.ep_name_ + ";"; - - if (info.find(expected_prefix) != 0) { - // The compatibility info doesn't match our EP - *model_compatibility = OrtCompiledModelCompatibility_EP_UNSUPPORTED; + if (info.find(factory.ep_name_ + ";") != 0) { + *model_compatibility = OrtCompiledModelCompatibility_EP_NOT_APPLICABLE; return nullptr; } - // Parse version parts: "ExampleEP;version=X;ort_api_version=Y" - // Look for "version=" and extract the value - size_t version_pos = info.find("version="); - size_t ort_version_pos = info.find("ort_api_version="); - - if (version_pos == std::string::npos) { - // Invalid format - *model_compatibility = OrtCompiledModelCompatibility_EP_UNSUPPORTED; - return nullptr; - } - - // Extract EP version (between "version=" and the next ";") - size_t version_start = version_pos + 8; // length of "version=" - size_t version_end = info.find(';', version_start); - std::string ep_version = (version_end != std::string::npos) - ? info.substr(version_start, version_end - version_start) - : info.substr(version_start); - - // Check if the EP version matches our version - if (ep_version != factory.ep_version_) { - // Different EP version - might work but prefer recompilation - *model_compatibility = OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; + // `devices` are the hardware devices this EP would run the model on *together* (e.g., multi-adapter or multi-GPU). + // Because we must return a single verdict for the whole set, evaluate the string against each device and combine + // the per-device verdicts: EP_NOT_APPLICABLE is neutral and otherwise the worst verdict wins, so the result is + // EP_SUPPORTED_OPTIMAL only if every device the EP has an opinion on is optimal. See the documentation for + // ValidateCompiledModelCompatibilityInfo in onnxruntime_ep_c_api.h. + if (num_devices == 0) { + // No specific devices supplied; evaluate against the EP's own configuration. + *model_compatibility = + ComputeCompatibilityForDevice(factory.ort_api, factory.GetEpVersionString(), info, /*device*/ nullptr); return nullptr; } - // Check ORT API version if present - if (ort_version_pos != std::string::npos) { - size_t ort_version_start = ort_version_pos + 16; // length of "ort_api_version=" - size_t ort_version_end = info.find(';', ort_version_start); - std::string ort_version = (ort_version_end != std::string::npos) - ? info.substr(ort_version_start, ort_version_end - ort_version_start) - : info.substr(ort_version_start); - std::string current_ort_version = std::to_string(ORT_API_VERSION); - if (ort_version != current_ort_version) { - // Different ORT version - might still work but prefer recompilation - *model_compatibility = OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; - return nullptr; - } - } - - // Check hardware architecture compatibility if that information is included in the compatibility_info string. - size_t hardware_arch_pos = info.find("hardware_architecture="); - if (hardware_arch_pos != std::string::npos) { - size_t hardware_arch_start = hardware_arch_pos + 22; // length of "hardware_architecture=" - std::string hardware_arch = info.substr(hardware_arch_start); - std::string current_hardware_arch = "arch1"; // "arch1" is for test purpose. - // Replace with actual hardware architecture detection if needed - if (hardware_arch != current_hardware_arch) { - // Different hardware architecture - might still work but prefer recompilation - *model_compatibility = OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; - return nullptr; + OrtCompiledModelCompatibility combined = OrtCompiledModelCompatibility_EP_NOT_APPLICABLE; + for (size_t i = 0; i < num_devices; ++i) { + combined = example_ep::CombineCompatibility( + combined, ComputeCompatibilityForDevice(factory.ort_api, factory.GetEpVersionString(), info, devices[i])); + if (combined == OrtCompiledModelCompatibility_EP_UNSUPPORTED) { + break; // worst possible verdict; no need to evaluate the remaining devices } } - // Everything matches - the compiled model is fully compatible - *model_compatibility = OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL; + *model_compatibility = combined; return nullptr; } diff --git a/onnxruntime/test/autoep/test_model_package.cc b/onnxruntime/test/autoep/test_model_package.cc index 34f73eb69b149..c4b905f2b6be7 100644 --- a/onnxruntime/test/autoep/test_model_package.cc +++ b/onnxruntime/test/autoep/test_model_package.cc @@ -14,6 +14,7 @@ #include "core/session/model_package/model_package_context.h" #include "core/session/onnxruntime_experimental_c_api.h" #include "core/session/abi_devices.h" +#include "test/autoep/library/example_plugin_ep/compatibility_combine.h" #include "test/autoep/test_autoep_utils.h" #include "test/util/include/asserts.h" #include "test/util/include/api_asserts.h" @@ -675,6 +676,88 @@ TEST(ModelPackageTest, CheckCompiledModelCompatibilityInfo) { std::filesystem::remove_all(package_root, ec); } +// Exercises OrtEpFactory::ValidateCompiledModelCompatibilityInfo through the public +// GetModelCompatibilityForEpDevices API against the example plugin EP. Covers the single-device verdicts, the +// "no opinion" cases (empty / foreign compatibility string -> EP_NOT_APPLICABLE), and the multi-device path +// (num_devices > 1) where the per-device verdicts are combined into a single result. +// +// Note: the example EP derives its per-device verdict from the device type, but only one hardware device (CPU) +// is available here, so the multi-device cases below exercise the loop and combine path with uniform verdicts. +// Heterogeneous (mixed) per-device verdicts are covered directly by the CombineCompatibility_* test below. +TEST(ModelPackageTest, ValidateCompiledModelCompatibilityInfo_ExampleEp) { + RegisteredEpDeviceUniquePtr example_ep; + ASSERT_NO_FATAL_FAILURE(Utils::RegisterAndGetExampleEp(*ort_env, Utils::example_ep_info, example_ep)); + Ort::ConstEpDevice plugin_ep_device(example_ep.get()); + + // Build compat strings against the current ORT_API_VERSION so the EP's ORT-version check passes and the + // hardware_architecture field is what differentiates: arch1 -> OPTIMAL, arch2 -> PREFER_RECOMPILATION. + const std::string ort_api_version_str = std::to_string(ORT_API_VERSION); + const std::string compat_arch1 = + "example_ep;version=0.1.0;ort_api_version=" + ort_api_version_str + ";hardware_architecture=arch1"; + const std::string compat_arch2 = + "example_ep;version=0.1.0;ort_api_version=" + ort_api_version_str + ";hardware_architecture=arch2"; + // A string produced by a different EP (does not start with this EP's name). + const std::string foreign_info = + "some_other_ep;version=0.1.0;ort_api_version=" + ort_api_version_str + ";hardware_architecture=arch1"; + + const std::vector one_device{plugin_ep_device}; + // Pass the same EP device twice to drive num_devices == 2 through the factory's combine path. + const std::vector two_devices{plugin_ep_device, plugin_ep_device}; + + // Single-device verdicts. + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(one_device, compat_arch1.c_str()), + OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL); + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(one_device, compat_arch2.c_str()), + OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION); + + // "No opinion": an empty string or a string produced by a different EP -> EP_NOT_APPLICABLE. + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(one_device, ""), + OrtCompiledModelCompatibility_EP_NOT_APPLICABLE); + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(one_device, foreign_info.c_str()), + OrtCompiledModelCompatibility_EP_NOT_APPLICABLE); + + // Multi-device: per-device verdicts are combined into a single result (uniform across devices here). + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(two_devices, compat_arch1.c_str()), + OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL); + EXPECT_EQ(Ort::GetModelCompatibilityForEpDevices(two_devices, compat_arch2.c_str()), + OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION); +} + +// Unit test for the per-device verdict fold (example_ep::CombineCompatibility) that +// OrtEpFactory::ValidateCompiledModelCompatibilityInfo uses to reduce multiple per-device verdicts into a single +// verdict. This exercises the worst-of + EP_NOT_APPLICABLE-identity rule with *heterogeneous* inputs, which the +// end-to-end test above cannot (the example EP exposes a single hardware device). +TEST(ModelPackageTest, CombineCompatibility_WorstOfWithNotApplicableIdentity) { + using example_ep::CombineCompatibility; + constexpr auto kNA = OrtCompiledModelCompatibility_EP_NOT_APPLICABLE; + constexpr auto kUnsupported = OrtCompiledModelCompatibility_EP_UNSUPPORTED; + constexpr auto kPrefer = OrtCompiledModelCompatibility_EP_SUPPORTED_PREFER_RECOMPILATION; + constexpr auto kOptimal = OrtCompiledModelCompatibility_EP_SUPPORTED_OPTIMAL; + + // EP_NOT_APPLICABLE is the (two-sided) identity element. + EXPECT_EQ(CombineCompatibility(kNA, kNA), kNA); + EXPECT_EQ(CombineCompatibility(kNA, kOptimal), kOptimal); + EXPECT_EQ(CombineCompatibility(kOptimal, kNA), kOptimal); + EXPECT_EQ(CombineCompatibility(kNA, kUnsupported), kUnsupported); + + // Worst-of wins for heterogeneous verdicts, in either argument order. + EXPECT_EQ(CombineCompatibility(kOptimal, kPrefer), kPrefer); + EXPECT_EQ(CombineCompatibility(kPrefer, kOptimal), kPrefer); + EXPECT_EQ(CombineCompatibility(kOptimal, kUnsupported), kUnsupported); + EXPECT_EQ(CombineCompatibility(kUnsupported, kPrefer), kUnsupported); + + // Idempotent for uniform verdicts. + EXPECT_EQ(CombineCompatibility(kOptimal, kOptimal), kOptimal); + EXPECT_EQ(CombineCompatibility(kUnsupported, kUnsupported), kUnsupported); + + // A full left-fold like the EP performs over a device set: optimal, prefer, optimal -> prefer. + OrtCompiledModelCompatibility acc = kNA; + acc = CombineCompatibility(acc, kOptimal); + acc = CombineCompatibility(acc, kPrefer); + acc = CombineCompatibility(acc, kOptimal); + EXPECT_EQ(acc, kPrefer); +} + TEST(ModelPackageTest, LoadModelPackageAndRunInference_DiscoverComponentsFromModelsFolder) { // manifest.json without "components"; discovery should scan models/* with metadata.json. const auto package_root = std::filesystem::temp_directory_path() / "ort_model_package_discover_test";