diff --git a/dotnet/agent-framework-dotnet.slnx b/dotnet/agent-framework-dotnet.slnx
index a4ffe13958..1c47bfe2b5 100644
--- a/dotnet/agent-framework-dotnet.slnx
+++ b/dotnet/agent-framework-dotnet.slnx
@@ -147,6 +147,7 @@
     <File Path="samples/02-agents/FoundryAgents/README.md" />
     <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step01_RedTeaming/FoundryAgents_Evaluations_Step01_RedTeaming.csproj" />
     <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/FoundryAgents_Evaluations_Step02_SelfReflection.csproj" />
+    <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/FoundryAgents_Evaluations_Step03_AllPatterns.csproj" />
     <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Step01.1_Basics/FoundryAgents_Step01.1_Basics.csproj" />
     <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Step01.2_Running/FoundryAgents_Step01.2_Running.csproj" />
     <Project Path="samples/02-agents/FoundryAgents/FoundryAgents_Step02_MultiturnConversation/FoundryAgents_Step02_MultiturnConversation.csproj" />
diff --git a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/FoundryAgents_Evaluations_Step02_SelfReflection.csproj b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/FoundryAgents_Evaluations_Step02_SelfReflection.csproj
index 646cd75532..8b6a7d5001 100644
--- a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/FoundryAgents_Evaluations_Step02_SelfReflection.csproj
+++ b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/FoundryAgents_Evaluations_Step02_SelfReflection.csproj
@@ -9,7 +9,6 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Azure.AI.OpenAI" />
     <PackageReference Include="Azure.AI.Projects" />
     <PackageReference Include="Azure.Identity" />
     <PackageReference Include="Microsoft.Extensions.AI.Evaluation" />
diff --git a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/Program.cs b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/Program.cs
index 8f8c9fa4ee..9f7ad4be3a 100644
--- a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/Program.cs
+++ b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step02_SelfReflection/Program.cs
@@ -12,7 +12,6 @@
 // For more details, see:
 // https://learn.microsoft.com/dotnet/ai/evaluation/libraries
 
-using Azure.AI.OpenAI;
 using Azure.AI.Projects;
 using Azure.Identity;
 using Microsoft.Agents.AI;
@@ -24,26 +23,25 @@
 using ChatMessage = Microsoft.Extensions.AI.ChatMessage;
 using ChatRole = Microsoft.Extensions.AI.ChatRole;
 
-string endpoint = Environment.GetEnvironmentVariable("AZURE_AI_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_AI_PROJECT_ENDPOINT is not set.");
-string deploymentName = Environment.GetEnvironmentVariable("AZURE_AI_MODEL_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
-string openAiEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ?? throw new InvalidOperationException("AZURE_OPENAI_ENDPOINT is not set.");
-string evaluatorDeploymentName = Environment.GetEnvironmentVariable("AZURE_OPENAI_DEPLOYMENT_NAME") ?? deploymentName;
+string endpoint = Environment.GetEnvironmentVariable("AZURE_FOUNDRY_PROJECT_ENDPOINT") ?? throw new InvalidOperationException("AZURE_FOUNDRY_PROJECT_ENDPOINT is not set.");
+string deploymentName = Environment.GetEnvironmentVariable("AZURE_FOUNDRY_PROJECT_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
 
 Console.WriteLine("=" + new string('=', 79));
 Console.WriteLine("SELF-REFLECTION EVALUATION SAMPLE");
 Console.WriteLine("=" + new string('=', 79));
 Console.WriteLine();
 
-// Initialize Azure credentials and client
+// Initialize Azure credentials and client — everything derives from the project endpoint
 // WARNING: DefaultAzureCredential is convenient for development but requires careful consideration in production.
 // In production, consider using a specific credential (e.g., ManagedIdentityCredential) to avoid
 // latency issues, unintended credential probing, and potential security risks from fallback mechanisms.
 DefaultAzureCredential credential = new();
 AIProjectClient aiProjectClient = new(new Uri(endpoint), credential);
 
-// Set up the LLM-based chat client for quality evaluators
-IChatClient chatClient = new AzureOpenAIClient(new Uri(openAiEndpoint), credential)
-    .GetChatClient(evaluatorDeploymentName)
+// Get a chat client for LLM-based evaluators from the project client
+IChatClient chatClient = aiProjectClient
+    .GetProjectOpenAIClient()
+    .GetChatClient(deploymentName)
     .AsIChatClient();
 
 // Configure evaluation: quality evaluators use the LLM, safety evaluators use Azure AI Foundry
@@ -55,7 +53,8 @@
     originalChatConfiguration: new ChatConfiguration(chatClient));
 
 // Create a test agent
-AIAgent agent = await aiProjectClient.CreateAIAgentAsync(
+AIAgent? agent = null;
+agent = await aiProjectClient.CreateAIAgentAsync(
     name: "KnowledgeAgent",
     model: deploymentName,
     instructions: "You are a helpful assistant. Answer questions accurately based on the provided context.");
@@ -93,9 +92,12 @@ 7. Enterprise-grade compliance and governance features
 finally
 {
     // Cleanup
-    await aiProjectClient.Agents.DeleteAgentAsync(agent.Name);
-    Console.WriteLine();
-    Console.WriteLine("Cleanup: Agent deleted.");
+    if (agent is not null)
+    {
+        await aiProjectClient.Agents.DeleteAgentAsync(agent.Name);
+        Console.WriteLine();
+        Console.WriteLine("Cleanup: Agent deleted.");
+    }
 }
 
 // ============================================================================
diff --git a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/FoundryAgents_Evaluations_Step03_AllPatterns.csproj b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/FoundryAgents_Evaluations_Step03_AllPatterns.csproj
new file mode 100644
index 0000000000..8b6a7d5001
--- /dev/null
+++ b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/FoundryAgents_Evaluations_Step03_AllPatterns.csproj
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFrameworks>net10.0</TargetFrameworks>
+
+    <Nullable>enable</Nullable>
+    <ImplicitUsings>enable</ImplicitUsings>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Azure.AI.Projects" />
+    <PackageReference Include="Azure.Identity" />
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation" />
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation.Quality" />
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation.Safety" />
+    <PackageReference Include="Microsoft.Extensions.AI.OpenAI" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\..\..\src\Microsoft.Agents.AI.AzureAI\Microsoft.Agents.AI.AzureAI.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/Program.cs b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/Program.cs
new file mode 100644
index 0000000000..4f5ea0c706
--- /dev/null
+++ b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/Program.cs
@@ -0,0 +1,339 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+// This sample demonstrates all evaluation patterns available in Agent Framework for .NET.
+// It covers:
+//   1. Function evaluators — custom checks using lambdas
+//   2. Built-in checks — keyword and tool-called validation
+//   3. MEAI evaluators — LLM-based quality scoring (Relevance, Coherence, Groundedness)
+//   4. Foundry evaluators — cloud-based evaluation with Azure AI Foundry
+//   5. Mixed evaluators — combining local checks with cloud evaluation
+//   6. Pre-existing response evaluation — evaluate responses without re-running the agent
+//   7. Conversation split strategies — LastTurn, Full, PerTurn, and call-site override
+//
+// Mirrors the Python sample: evaluate_all_patterns_sample.py
+
+using Azure.AI.Projects;
+using Azure.Identity;
+using Microsoft.Agents.AI;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+using Microsoft.Extensions.AI.Evaluation.Quality;
+using Microsoft.Extensions.AI.Evaluation.Safety;
+
+using ChatMessage = Microsoft.Extensions.AI.ChatMessage;
+using ChatRole = Microsoft.Extensions.AI.ChatRole;
+using FoundryEvals = Microsoft.Agents.AI.AzureAI.FoundryEvals;
+
+string endpoint = Environment.GetEnvironmentVariable("AZURE_FOUNDRY_PROJECT_ENDPOINT")
+    ?? throw new InvalidOperationException("AZURE_FOUNDRY_PROJECT_ENDPOINT is not set.");
+string deploymentName = Environment.GetEnvironmentVariable("AZURE_FOUNDRY_PROJECT_DEPLOYMENT_NAME") ?? "gpt-4o-mini";
+
+Console.WriteLine("=" + new string('=', 79));
+Console.WriteLine("AGENT FRAMEWORK EVALUATION — ALL PATTERNS");
+Console.WriteLine("=" + new string('=', 79));
+Console.WriteLine();
+
+// Initialize Azure credentials and clients — everything derives from the project endpoint
+DefaultAzureCredential credential = new();
+AIProjectClient aiProjectClient = new(new Uri(endpoint), credential);
+
+// Get a chat client for LLM-based evaluators from the project client
+IChatClient chatClient = aiProjectClient
+    .GetProjectOpenAIClient()
+    .GetChatClient(deploymentName)
+    .AsIChatClient();
+
+ContentSafetyServiceConfiguration safetyConfig = new(
+    credential: credential,
+    endpoint: new Uri(endpoint));
+
+ChatConfiguration chatConfiguration = safetyConfig.ToChatConfiguration(
+    originalChatConfiguration: new ChatConfiguration(chatClient));
+
+// Create test agent
+AIAgent? agent = null;
+agent = await aiProjectClient.CreateAIAgentAsync(
+    name: "WeatherAgent",
+    model: deploymentName,
+    instructions: "You are a helpful weather assistant. Answer questions about weather accurately and concisely.");
+
+Console.WriteLine($"Created agent: {agent.Name}");
+Console.WriteLine();
+
+string[] queries = ["What's the weather in Seattle?", "Is it going to rain in New York today?"];
+
+try
+{
+    // ================================================================
+    // Section 1: Function Evaluators
+    // ================================================================
+    Console.WriteLine("SECTION 1: Function Evaluators");
+    Console.WriteLine(new string('-', 60));
+
+    var functionEvaluator = new LocalEvaluator(
+        FunctionEvaluator.Create("is_concise",
+            (string response) => response.Split(' ').Length < 500),
+        FunctionEvaluator.Create("has_content",
+            (string response) => response.Length > 10),
+        FunctionEvaluator.Create("mentions_location",
+            (EvalItem item) => item.Response.Contains("Seattle", StringComparison.OrdinalIgnoreCase)
+                || item.Response.Contains("New York", StringComparison.OrdinalIgnoreCase)));
+
+    AgentEvaluationResults functionResults = await agent.EvaluateAsync(
+        queries,
+        functionEvaluator);
+
+    PrintResults("Function Evaluators", functionResults);
+
+    // ================================================================
+    // Section 2: Built-in Checks
+    // ================================================================
+    Console.WriteLine("SECTION 2: Built-in Checks");
+    Console.WriteLine(new string('-', 60));
+
+    var builtinEvaluator = new LocalEvaluator(
+        EvalChecks.KeywordCheck("weather"),
+        EvalChecks.KeywordCheck(caseSensitive: false, "temperature", "forecast"));
+
+    AgentEvaluationResults builtinResults = await agent.EvaluateAsync(
+        queries,
+        builtinEvaluator);
+
+    PrintResults("Built-in Checks", builtinResults);
+
+    // ================================================================
+    // Section 3: MEAI Quality Evaluators
+    // ================================================================
+    Console.WriteLine("SECTION 3: MEAI Quality Evaluators");
+    Console.WriteLine(new string('-', 60));
+
+    // Pass MEAI evaluators directly — no adapter needed
+    AgentEvaluationResults meaiResults = await agent.EvaluateAsync(
+        queries,
+        new CompositeEvaluator(
+            new RelevanceEvaluator(),
+            new CoherenceEvaluator()),
+        chatConfiguration);
+
+    PrintResults("MEAI Quality", meaiResults);
+
+    // Print per-metric details for MEAI results
+    foreach (EvaluationResult itemResult in meaiResults.Items)
+    {
+        foreach (EvaluationMetric metric in itemResult.Metrics.Values)
+        {
+            if (metric is NumericMetric n)
+            {
+                string rating = n.Interpretation?.Rating.ToString() ?? "N/A";
+                Console.WriteLine($"  {n.Name,-20} Score: {n.Value:F1}/5  Rating: {rating}");
+            }
+        }
+    }
+
+    Console.WriteLine();
+
+    // ================================================================
+    // Section 4: Foundry Evaluators (Cloud-based)
+    // ================================================================
+    Console.WriteLine("SECTION 4: Foundry Evaluators");
+    Console.WriteLine(new string('-', 60));
+
+    var foundryEvaluator = new FoundryEvals(
+        chatConfiguration,
+        FoundryEvals.Relevance,
+        FoundryEvals.Coherence,
+        FoundryEvals.Groundedness);
+
+    AgentEvaluationResults foundryResults = await agent.EvaluateAsync(
+        queries,
+        foundryEvaluator);
+
+    PrintResults("Foundry Evaluators", foundryResults);
+
+    // ================================================================
+    // Section 5: Mixed Evaluators (Local + Cloud)
+    // ================================================================
+    Console.WriteLine("SECTION 5: Mixed Evaluators");
+    Console.WriteLine(new string('-', 60));
+
+    IReadOnlyList<AgentEvaluationResults> mixedResults = await agent.EvaluateAsync(
+        queries,
+        evaluators: new IAgentEvaluator[]
+        {
+            new LocalEvaluator(
+                EvalChecks.KeywordCheck("weather"),
+                FunctionEvaluator.Create("not_empty", (string r) => r.Length > 0)),
+            new FoundryEvals(chatConfiguration, FoundryEvals.Relevance),
+        });
+
+    foreach (AgentEvaluationResults result in mixedResults)
+    {
+        PrintResults($"Mixed - {result.Provider}", result);
+    }
+
+    // ================================================================
+    // Section 6: Evaluate Pre-existing Responses
+    // ================================================================
+    Console.WriteLine("SECTION 6: Evaluate Pre-existing Responses");
+    Console.WriteLine(new string('-', 60));
+
+    // Get responses first
+    var savedQueries = new List<string>();
+    var savedResponses = new List<AgentResponse>();
+    foreach (string query in queries)
+    {
+        AgentResponse response = await agent.RunAsync(
+            new List<ChatMessage> { new(ChatRole.User, query) });
+        savedQueries.Add(query);
+        savedResponses.Add(response);
+    }
+
+    // Evaluate the saved responses without re-running the agent
+    AgentEvaluationResults preExistingResults = await agent.EvaluateAsync(
+        savedResponses,
+        savedQueries,
+        new LocalEvaluator(
+            EvalChecks.KeywordCheck("weather"),
+            FunctionEvaluator.Create("response_quality",
+                (EvalItem item) => new EvalCheckResult(
+                    item.Response.Length > 20,
+                    item.Response.Length > 20
+                        ? "Response is detailed enough"
+                        : "Response is too short",
+                    "response_quality"))));
+
+    PrintResults("Pre-existing Responses", preExistingResults);
+
+    // ================================================================
+    // Section 7: Conversation Split Strategies
+    // ================================================================
+    Console.WriteLine("SECTION 7: Conversation Split Strategies");
+    Console.WriteLine(new string('-', 60));
+
+    // Build a multi-turn conversation manually
+    var multiTurnConversation = new List<ChatMessage>
+    {
+        new(ChatRole.User, "What's the weather in Seattle?"),
+        new(ChatRole.Assistant, "Seattle is 62°F, cloudy with a chance of rain."),
+        new(ChatRole.User, "And Paris?"),
+        new(ChatRole.Assistant, "Paris is 68°F, partly sunny."),
+        new(ChatRole.User, "Compare them."),
+        new(ChatRole.Assistant, "Seattle is cooler at 62°F with rain likely, while Paris is warmer at 68°F and sunnier."),
+    };
+
+    // Strategy 1: LAST_TURN (default) — evaluates the final response
+    var lastTurnItem = new EvalItem(
+        "Compare them.",
+        "Seattle is cooler at 62°F with rain likely, while Paris is warmer at 68°F and sunnier.",
+        multiTurnConversation);
+
+    var (lastQuery, lastResponse) = lastTurnItem.Split(ConversationSplitters.LastTurn);
+    Console.WriteLine($"  LastTurn split: {lastQuery.Count} query msgs, {lastResponse.Count} response msgs");
+
+    // Strategy 2: FULL — evaluates the whole conversation trajectory
+    var fullItem = new EvalItem(
+        "What's the weather in Seattle?",
+        "Full conversation trajectory",
+        multiTurnConversation)
+    {
+        Splitter = ConversationSplitters.Full,
+    };
+
+    var (fullQuery, fullResponse) = fullItem.Split();
+    Console.WriteLine($"  Full split: {fullQuery.Count} query msgs, {fullResponse.Count} response msgs");
+
+    // Strategy 3: PER_TURN — one eval item per user turn
+    var perTurnItems = EvalItem.PerTurnItems(multiTurnConversation);
+    Console.WriteLine($"  PerTurn split: {perTurnItems.Count} items from {multiTurnConversation.Count} messages");
+
+    foreach (var turnItem in perTurnItems)
+    {
+        Console.WriteLine($"    Turn: \"{turnItem.Query}\" → {turnItem.Response.Length} chars");
+    }
+
+    // Evaluate per-turn items with a local evaluator
+    var splitEvaluator = new LocalEvaluator(
+        FunctionEvaluator.Create("has_response", (string r) => r.Length > 5));
+
+    AgentEvaluationResults perTurnResults = await splitEvaluator.EvaluateAsync(
+        perTurnItems.ToList());
+
+    PrintResults("Per-Turn Evaluation", perTurnResults);
+
+    // Strategy 4: Call-site override with built-in splitter
+    AgentEvaluationResults fullSplitResults = await agent.EvaluateAsync(
+        queries,
+        new LocalEvaluator(EvalChecks.KeywordCheck("weather")),
+        splitter: ConversationSplitters.Full);
+
+    PrintResults("Call-site Full Split", fullSplitResults);
+
+    // Strategy 5: Custom splitter as call-site override
+    // Same parameter works for built-in and custom splitters
+    AgentEvaluationResults customSplitResults = await agent.EvaluateAsync(
+        queries,
+        new LocalEvaluator(EvalChecks.KeywordCheck("weather")),
+        splitter: new WeatherToolSplitter());
+
+    PrintResults("Custom Splitter Override", customSplitResults);
+    Console.WriteLine();
+}
+finally
+{
+    // Cleanup
+    if (agent is not null)
+    {
+        await aiProjectClient.Agents.DeleteAgentAsync(agent.Name);
+        Console.WriteLine("Cleanup: Agent deleted.");
+    }
+}
+
+// ============================================================================
+// Helper Functions
+// ============================================================================
+
+static void PrintResults(string title, AgentEvaluationResults results)
+{
+    string status = results.AllPassed ? "✓ ALL PASSED" : "✗ SOME FAILED";
+    Console.WriteLine($"  [{title}] {status} ({results.Passed}/{results.Total})");
+
+    if (results.SubResults is not null)
+    {
+        foreach (var (agentId, sub) in results.SubResults)
+        {
+            string subStatus = sub.AllPassed ? "✓" : "✗";
+            Console.WriteLine($"    {subStatus} {agentId}: {sub.Passed}/{sub.Total}");
+        }
+    }
+
+    Console.WriteLine();
+}
+
+// ============================================================================
+// Custom Splitter — demonstrates IConversationSplitter
+// ============================================================================
+
+/// <summary>
+/// Example custom splitter that splits before the first tool call.
+/// Evaluates whether the agent's tool usage and final response are appropriate.
+/// </summary>
+internal sealed class WeatherToolSplitter : IConversationSplitter
+{
+    public (IReadOnlyList<ChatMessage> QueryMessages, IReadOnlyList<ChatMessage> ResponseMessages) Split(
+        IReadOnlyList<ChatMessage> conversation)
+    {
+        for (int i = 0; i < conversation.Count; i++)
+        {
+            if (conversation[i].Role == ChatRole.Assistant
+                && conversation[i].Contents.OfType<FunctionCallContent>().Any())
+            {
+                return (
+                    conversation.Take(i).ToList(),
+                    conversation.Skip(i).ToList());
+            }
+        }
+
+        // Fallback: use the default LastTurn split
+        return ConversationSplitters.LastTurn.Split(conversation);
+    }
+}
diff --git a/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/README.md b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/README.md
new file mode 100644
index 0000000000..28eab9dd36
--- /dev/null
+++ b/dotnet/samples/02-agents/FoundryAgents/FoundryAgents_Evaluations_Step03_AllPatterns/README.md
@@ -0,0 +1,49 @@
+# Evaluation — All Patterns
+
+This sample demonstrates all evaluation patterns available in Agent Framework for .NET:
+
+| Section | Pattern | Description |
+|---------|---------|-------------|
+| 1 | **Function Evaluators** | Custom checks using C# lambdas via `FunctionEvaluator.Create()` |
+| 2 | **Built-in Checks** | `EvalChecks.KeywordCheck()` and `EvalChecks.ToolCalledCheck()` |
+| 3 | **MEAI Quality Evaluators** | LLM-based scoring with `RelevanceEvaluator`, `CoherenceEvaluator` |
+| 4 | **Foundry Evaluators** | Cloud-based evaluation via `FoundryEvals` |
+| 5 | **Mixed Evaluators** | Combining local checks with cloud evaluation in one call |
+| 6 | **Pre-existing Responses** | Evaluate saved responses without re-running the agent |
+
+## Prerequisites
+
+- Azure AI Foundry project with a deployed model
+- Set environment variables:
+  - `AZURE_FOUNDRY_PROJECT_ENDPOINT` — Your Azure AI Foundry project endpoint
+  - `AZURE_FOUNDRY_PROJECT_DEPLOYMENT_NAME` — Model deployment name (default: `gpt-4o-mini`)
+
+## Key Types
+
+```csharp
+// Custom function evaluators
+var check = FunctionEvaluator.Create("name", (string response) => response.Length > 10);
+
+// Built-in checks
+var keyword = EvalChecks.KeywordCheck("expected", "keywords");
+var toolCheck = EvalChecks.ToolCalledCheck("tool_name");
+
+// Local evaluator runs checks without API calls
+var local = new LocalEvaluator(check, keyword, toolCheck);
+
+// MEAI evaluators work directly — no adapter needed
+var results = await agent.EvaluateAsync(queries, new RelevanceEvaluator(), chatConfig);
+
+// Foundry evaluator uses Azure AI Foundry cloud evaluation
+var foundry = new FoundryEvals(chatConfig, FoundryEvals.Relevance, FoundryEvals.Coherence);
+
+// Evaluate an agent
+AgentEvaluationResults localResults = await agent.EvaluateAsync(queries, local);
+localResults.AssertAllPassed();
+```
+
+## Running
+
+```bash
+dotnet run --project FoundryAgents_Evaluations_Step03_AllPatterns.csproj
+```
diff --git a/dotnet/src/Microsoft.Agents.AI.AzureAI/Evaluation/FoundryEvals.cs b/dotnet/src/Microsoft.Agents.AI.AzureAI/Evaluation/FoundryEvals.cs
new file mode 100644
index 0000000000..a731af1099
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI.AzureAI/Evaluation/FoundryEvals.cs
@@ -0,0 +1,237 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+using Microsoft.Extensions.AI.Evaluation.Quality;
+using Microsoft.Extensions.AI.Evaluation.Safety;
+
+namespace Microsoft.Agents.AI.AzureAI;
+
+/// <summary>
+/// Azure AI Foundry evaluator provider with built-in evaluator name constants.
+/// </summary>
+/// <remarks>
+/// <para>
+/// Combines evaluator constants (e.g., <see cref="Relevance"/>, <see cref="Coherence"/>)
+/// with the <see cref="IAgentEvaluator"/> implementation that maps them to MEAI evaluators.
+/// </para>
+/// <para>
+/// When the Azure.AI.Projects .NET SDK adds native evaluation API support, this class
+/// will be updated to use it for full parity with the Python <c>FoundryEvals</c> class.
+/// </para>
+/// </remarks>
+public sealed class FoundryEvals : IAgentEvaluator
+{
+    private readonly ChatConfiguration _chatConfiguration;
+    private readonly string[] _evaluatorNames;
+    private readonly IConversationSplitter? _splitter;
+
+    // -----------------------------------------------------------------------
+    // Constructors
+    // -----------------------------------------------------------------------
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="FoundryEvals"/> class.
+    /// </summary>
+    /// <param name="chatConfiguration">Chat configuration for the LLM-based evaluators.</param>
+    /// <param name="evaluators">
+    /// Names of evaluators to use (e.g., <see cref="Relevance"/>, <see cref="Coherence"/>).
+    /// When empty, defaults to relevance and coherence.
+    /// </param>
+    public FoundryEvals(ChatConfiguration chatConfiguration, params string[] evaluators)
+        : this(chatConfiguration, splitter: null, evaluators)
+    {
+    }
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="FoundryEvals"/> class with a default splitter.
+    /// </summary>
+    /// <param name="chatConfiguration">Chat configuration for the LLM-based evaluators.</param>
+    /// <param name="splitter">
+    /// Default conversation splitter for multi-turn conversations. Overridden by
+    /// <see cref="EvalItem.Splitter"/> when set on individual items.
+    /// Use <see cref="ConversationSplitters.LastTurn"/>, <see cref="ConversationSplitters.Full"/>,
+    /// or a custom <see cref="IConversationSplitter"/> implementation.
+    /// </param>
+    /// <param name="evaluators">
+    /// Names of evaluators to use (e.g., <see cref="Relevance"/>, <see cref="Coherence"/>).
+    /// When empty, defaults to relevance and coherence.
+    /// </param>
+    public FoundryEvals(ChatConfiguration chatConfiguration, IConversationSplitter? splitter, params string[] evaluators)
+    {
+        this._chatConfiguration = chatConfiguration;
+        this._splitter = splitter;
+        this._evaluatorNames = evaluators.Length > 0
+            ? evaluators
+            : [Relevance, Coherence];
+    }
+
+    // -----------------------------------------------------------------------
+    // IAgentEvaluator
+    // -----------------------------------------------------------------------
+
+    /// <inheritdoc />
+    public string Name => "FoundryEvals";
+
+    /// <inheritdoc />
+    public async Task<AgentEvaluationResults> EvaluateAsync(
+        IReadOnlyList<EvalItem> items,
+        string evalName = "Foundry Eval",
+        CancellationToken cancellationToken = default)
+    {
+        var meaiEvaluators = BuildEvaluators(this._evaluatorNames);
+        var composite = new CompositeEvaluator(meaiEvaluators.ToArray());
+
+        var results = new List<EvaluationResult>(items.Count);
+
+        foreach (var item in items)
+        {
+            cancellationToken.ThrowIfCancellationRequested();
+
+            // Resolve splitter: item-level > evaluator-level > LastTurn default
+            var effectiveSplitter = item.Splitter ?? this._splitter;
+            var (queryMessages, _) = item.Split(effectiveSplitter);
+            var messages = queryMessages.ToList();
+
+            var chatResponse = item.RawResponse
+                ?? new ChatResponse(new ChatMessage(ChatRole.Assistant, item.Response));
+
+            var additionalContext = new List<EvaluationContext>();
+
+            if (item.Context is not null)
+            {
+                additionalContext.Add(new GroundednessEvaluatorContext(item.Context));
+            }
+
+            var result = await composite.EvaluateAsync(
+                messages,
+                chatResponse,
+                this._chatConfiguration,
+                additionalContext: additionalContext.Count > 0 ? additionalContext : null,
+                cancellationToken: cancellationToken).ConfigureAwait(false);
+
+            results.Add(result);
+        }
+
+        return new AgentEvaluationResults(this.Name, results);
+    }
+
+    // -----------------------------------------------------------------------
+    // Evaluator name constants
+    // -----------------------------------------------------------------------
+
+    // Agent behavior
+
+    /// <summary>Evaluates whether the agent correctly resolves user intent.</summary>
+    public const string IntentResolution = "intent_resolution";
+
+    /// <summary>Evaluates whether the agent adheres to its task instructions.</summary>
+    public const string TaskAdherence = "task_adherence";
+
+    /// <summary>Evaluates whether the agent completes the requested task.</summary>
+    public const string TaskCompletion = "task_completion";
+
+    /// <summary>Evaluates the efficiency of the agent's navigation to complete the task.</summary>
+    public const string TaskNavigationEfficiency = "task_navigation_efficiency";
+
+    // Tool usage
+
+    /// <summary>Evaluates the accuracy of tool calls made by the agent.</summary>
+    public const string ToolCallAccuracy = "tool_call_accuracy";
+
+    /// <summary>Evaluates whether the agent selects the correct tools.</summary>
+    public const string ToolSelection = "tool_selection";
+
+    /// <summary>Evaluates the accuracy of inputs provided to tools.</summary>
+    public const string ToolInputAccuracy = "tool_input_accuracy";
+
+    /// <summary>Evaluates how well the agent uses tool outputs.</summary>
+    public const string ToolOutputUtilization = "tool_output_utilization";
+
+    /// <summary>Evaluates whether tool calls succeed.</summary>
+    public const string ToolCallSuccess = "tool_call_success";
+
+    // Quality
+
+    /// <summary>Evaluates the coherence of the response.</summary>
+    public const string Coherence = "coherence";
+
+    /// <summary>Evaluates the fluency of the response.</summary>
+    public const string Fluency = "fluency";
+
+    /// <summary>Evaluates the relevance of the response to the query.</summary>
+    public const string Relevance = "relevance";
+
+    /// <summary>Evaluates whether the response is grounded in the provided context.</summary>
+    public const string Groundedness = "groundedness";
+
+    /// <summary>Evaluates the completeness of the response.</summary>
+    public const string ResponseCompleteness = "response_completeness";
+
+    /// <summary>Evaluates the similarity between the response and the expected output.</summary>
+    public const string Similarity = "similarity";
+
+    // Safety
+
+    /// <summary>Evaluates the response for violent content.</summary>
+    public const string Violence = "violence";
+
+    /// <summary>Evaluates the response for sexual content.</summary>
+    public const string Sexual = "sexual";
+
+    /// <summary>Evaluates the response for self-harm content.</summary>
+    public const string SelfHarm = "self_harm";
+
+    /// <summary>Evaluates the response for hate or unfairness.</summary>
+    public const string HateUnfairness = "hate_unfairness";
+
+    // -----------------------------------------------------------------------
+    // Internal helpers
+    // -----------------------------------------------------------------------
+
+    internal static List<IEvaluator> BuildEvaluators(string[] names)
+    {
+        var evaluators = new List<IEvaluator>();
+        bool hasSafetyEvaluator = false;
+
+        foreach (var name in names)
+        {
+            IEvaluator? evaluator = name switch
+            {
+                Relevance => new RelevanceEvaluator(),
+                Coherence => new CoherenceEvaluator(),
+                Groundedness => new GroundednessEvaluator(),
+                Fluency => new FluencyEvaluator(),
+
+                // ContentHarmEvaluator covers all harm categories in one call — deduplicate
+                Violence or
+                Sexual or
+                SelfHarm or
+                HateUnfairness when !hasSafetyEvaluator => new ContentHarmEvaluator(),
+
+                Violence or
+                Sexual or
+                SelfHarm or
+                HateUnfairness => null,
+
+                _ => throw new ArgumentException(
+                    $"Evaluator '{name}' is not supported by the .NET FoundryEvals adapter. " +
+                    $"Supported: {Relevance}, {Coherence}, {Groundedness}, {Fluency}, " +
+                    $"{Violence}, {Sexual}, {SelfHarm}, {HateUnfairness}.",
+                    nameof(names)),
+            };
+
+            if (evaluator is ContentHarmEvaluator)
+            {
+                hasSafetyEvaluator = true;
+            }
+
+            if (evaluator is not null)
+            {
+                evaluators.Add(evaluator);
+            }
+        }
+
+        return evaluators;
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI.AzureAI/Microsoft.Agents.AI.AzureAI.csproj b/dotnet/src/Microsoft.Agents.AI.AzureAI/Microsoft.Agents.AI.AzureAI.csproj
index 0cd8690126..fce34b7201 100644
--- a/dotnet/src/Microsoft.Agents.AI.AzureAI/Microsoft.Agents.AI.AzureAI.csproj
+++ b/dotnet/src/Microsoft.Agents.AI.AzureAI/Microsoft.Agents.AI.AzureAI.csproj
@@ -20,6 +20,20 @@
     <PackageReference Include="OpenAI" />
   </ItemGroup>
 
+  <!-- Evaluation support requires net8.0+ (MEAI.Evaluation does not support legacy TFMs) -->
+  <ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation" />
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation.Quality" />
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation.Safety" />
+  </ItemGroup>
+  <ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <Compile Remove="Evaluation\**\*.cs" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <InternalsVisibleTo Include="Microsoft.Agents.AI.UnitTests" />
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\Microsoft.Agents.AI\Microsoft.Agents.AI.csproj" />
   </ItemGroup>
diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Evaluation/WorkflowEvaluationExtensions.cs b/dotnet/src/Microsoft.Agents.AI.Workflows/Evaluation/WorkflowEvaluationExtensions.cs
new file mode 100644
index 0000000000..badf6ff642
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Evaluation/WorkflowEvaluationExtensions.cs
@@ -0,0 +1,135 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI.Workflows;
+
+/// <summary>
+/// Extension methods for evaluating workflow runs.
+/// </summary>
+public static class WorkflowEvaluationExtensions
+{
+    /// <summary>
+    /// Evaluates a completed workflow run.
+    /// </summary>
+    /// <param name="run">The completed workflow run.</param>
+    /// <param name="evaluator">The evaluator to score results.</param>
+    /// <param name="includeOverall">Whether to include an overall evaluation.</param>
+    /// <param name="includePerAgent">Whether to include per-agent breakdowns.</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="splitter">
+    /// Optional conversation splitter to apply to all items.
+    /// Use <see cref="ConversationSplitters.LastTurn"/>, <see cref="ConversationSplitters.Full"/>,
+    /// or a custom <see cref="IConversationSplitter"/> implementation.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Evaluation results with optional per-agent sub-results.</returns>
+    public static async Task<AgentEvaluationResults> EvaluateAsync(
+        this Run run,
+        IAgentEvaluator evaluator,
+        bool includeOverall = true,
+        bool includePerAgent = true,
+        string evalName = "Workflow Eval",
+        IConversationSplitter? splitter = null,
+        CancellationToken cancellationToken = default)
+    {
+        var events = run.OutgoingEvents.ToList();
+
+        // Extract per-agent data
+        var agentData = ExtractAgentData(events, splitter);
+
+        // Build overall items from final output
+        var overallItems = new List<EvalItem>();
+        if (includeOverall)
+        {
+            var finalResponse = events.OfType<AgentResponseEvent>().LastOrDefault();
+            if (finalResponse is not null)
+            {
+                var firstInvoked = events.OfType<ExecutorInvokedEvent>().FirstOrDefault();
+                var query = firstInvoked?.Data?.ToString() ?? string.Empty;
+                var conversation = new List<ChatMessage>
+                {
+                    new(ChatRole.User, query),
+                    new(ChatRole.Assistant, finalResponse.Response.Text),
+                };
+
+                overallItems.Add(new EvalItem(query, finalResponse.Response.Text, conversation)
+                {
+                    Splitter = splitter,
+                });
+            }
+        }
+
+        // Evaluate overall
+        var overallResult = overallItems.Count > 0
+            ? await evaluator.EvaluateAsync(overallItems, evalName, cancellationToken).ConfigureAwait(false)
+            : new AgentEvaluationResults(evaluator.Name, Array.Empty<EvaluationResult>());
+
+        // Per-agent breakdown
+        if (includePerAgent && agentData.Count > 0)
+        {
+            var subResults = new Dictionary<string, AgentEvaluationResults>();
+
+            foreach (var kvp in agentData)
+            {
+                subResults[kvp.Key] = await evaluator.EvaluateAsync(
+                    kvp.Value,
+                    $"{evalName} - {kvp.Key}",
+                    cancellationToken).ConfigureAwait(false);
+            }
+
+            overallResult.SubResults = subResults;
+        }
+
+        return overallResult;
+    }
+
+    internal static Dictionary<string, List<EvalItem>> ExtractAgentData(
+        List<WorkflowEvent> events,
+        IConversationSplitter? splitter)
+    {
+        var invoked = new Dictionary<string, ExecutorInvokedEvent>();
+        var agentData = new Dictionary<string, List<EvalItem>>();
+
+        foreach (var evt in events)
+        {
+            if (evt is ExecutorInvokedEvent invokedEvent)
+            {
+                invoked[invokedEvent.ExecutorId] = invokedEvent;
+            }
+            else if (evt is ExecutorCompletedEvent completedEvent
+                     && invoked.TryGetValue(completedEvent.ExecutorId, out var matchingInvoked))
+            {
+                var query = matchingInvoked.Data?.ToString() ?? string.Empty;
+                var responseText = completedEvent.Data?.ToString() ?? string.Empty;
+                var conversation = new List<ChatMessage>
+                {
+                    new(ChatRole.User, query),
+                    new(ChatRole.Assistant, responseText),
+                };
+
+                var item = new EvalItem(query, responseText, conversation)
+                {
+                    Splitter = splitter,
+                };
+
+                if (!agentData.TryGetValue(completedEvent.ExecutorId, out var items))
+                {
+                    items = new List<EvalItem>();
+                    agentData[completedEvent.ExecutorId] = items;
+                }
+
+                items.Add(item);
+                invoked.Remove(completedEvent.ExecutorId);
+            }
+        }
+
+        return agentData;
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI.Workflows/Microsoft.Agents.AI.Workflows.csproj b/dotnet/src/Microsoft.Agents.AI.Workflows/Microsoft.Agents.AI.Workflows.csproj
index c103ead32d..0e4e20e47b 100644
--- a/dotnet/src/Microsoft.Agents.AI.Workflows/Microsoft.Agents.AI.Workflows.csproj
+++ b/dotnet/src/Microsoft.Agents.AI.Workflows/Microsoft.Agents.AI.Workflows.csproj
@@ -54,4 +54,9 @@
     <PackageReference Include="System.Diagnostics.DiagnosticSource" />
   </ItemGroup>
 
+  <!-- Evaluation support requires net8.0+ (MEAI.Evaluation does not support legacy TFMs) -->
+  <ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <Compile Remove="Evaluation\**\*.cs" />
+  </ItemGroup>
+
 </Project>
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationExtensions.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationExtensions.cs
new file mode 100644
index 0000000000..31904218ad
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationExtensions.cs
@@ -0,0 +1,355 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Extension methods for evaluating agents, responses, and workflow runs.
+/// </summary>
+public static partial class AgentEvaluationExtensions
+{
+    /// <summary>
+    /// Evaluates an agent by running it against test queries and scoring the responses.
+    /// </summary>
+    /// <param name="agent">The agent to evaluate.</param>
+    /// <param name="queries">Test queries to send to the agent.</param>
+    /// <param name="evaluator">The evaluator to score responses.</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="expectedOutput">
+    /// Optional ground-truth expected outputs, one per query. When provided,
+    /// must be the same length as <paramref name="queries"/>. Each value is
+    /// stamped on the corresponding <see cref="EvalItem.ExpectedOutput"/>.
+    /// </param>
+    /// <param name="expectedToolCalls">
+    /// Optional expected tool calls, one list per query. When provided,
+    /// must be the same length as <paramref name="queries"/>. Each list is
+    /// stamped on the corresponding <see cref="EvalItem.ExpectedToolCalls"/>.
+    /// </param>
+    /// <param name="splitter">
+    /// Optional conversation splitter to apply to all items.
+    /// Use <see cref="ConversationSplitters.LastTurn"/>, <see cref="ConversationSplitters.Full"/>,
+    /// or a custom <see cref="IConversationSplitter"/> implementation.
+    /// </param>
+    /// <param name="numRepetitions">
+    /// Number of times to run each query (default 1). When greater than 1, each query is invoked
+    /// independently N times to measure consistency. Results contain all N × queries.Count items.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Evaluation results.</returns>
+    public static async Task<AgentEvaluationResults> EvaluateAsync(
+        this AIAgent agent,
+        IEnumerable<string> queries,
+        IAgentEvaluator evaluator,
+        string evalName = "Agent Framework Eval",
+        IEnumerable<string>? expectedOutput = null,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls = null,
+        IConversationSplitter? splitter = null,
+        int numRepetitions = 1,
+        CancellationToken cancellationToken = default)
+    {
+        var items = await RunAgentForEvalAsync(agent, queries, expectedOutput, expectedToolCalls, splitter, numRepetitions, cancellationToken).ConfigureAwait(false);
+        return await evaluator.EvaluateAsync(items, evalName, cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <summary>
+    /// Evaluates an agent using an MEAI evaluator directly.
+    /// </summary>
+    /// <param name="agent">The agent to evaluate.</param>
+    /// <param name="queries">Test queries to send to the agent.</param>
+    /// <param name="evaluator">The MEAI evaluator (e.g., <c>RelevanceEvaluator</c>, <c>CompositeEvaluator</c>).</param>
+    /// <param name="chatConfiguration">Chat configuration for the MEAI evaluator (includes the judge model).</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="expectedOutput">
+    /// Optional ground-truth expected outputs, one per query.
+    /// </param>
+    /// <param name="expectedToolCalls">
+    /// Optional expected tool calls, one list per query.
+    /// </param>
+    /// <param name="splitter">
+    /// Optional conversation splitter to apply to all items.
+    /// Use <see cref="ConversationSplitters.LastTurn"/>, <see cref="ConversationSplitters.Full"/>,
+    /// or a custom <see cref="IConversationSplitter"/> implementation.
+    /// </param>
+    /// <param name="numRepetitions">
+    /// Number of times to run each query (default 1). When greater than 1, each query is invoked
+    /// independently N times to measure consistency.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Evaluation results.</returns>
+    public static async Task<AgentEvaluationResults> EvaluateAsync(
+        this AIAgent agent,
+        IEnumerable<string> queries,
+        IEvaluator evaluator,
+        ChatConfiguration chatConfiguration,
+        string evalName = "Agent Framework Eval",
+        IEnumerable<string>? expectedOutput = null,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls = null,
+        IConversationSplitter? splitter = null,
+        int numRepetitions = 1,
+        CancellationToken cancellationToken = default)
+    {
+        var wrapped = new MeaiEvaluatorAdapter(evaluator, chatConfiguration);
+        return await agent.EvaluateAsync(queries, wrapped, evalName, expectedOutput, expectedToolCalls, splitter, numRepetitions, cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <summary>
+    /// Evaluates an agent by running it against test queries with multiple evaluators.
+    /// </summary>
+    /// <param name="agent">The agent to evaluate.</param>
+    /// <param name="queries">Test queries to send to the agent.</param>
+    /// <param name="evaluators">The evaluators to score responses.</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="expectedOutput">
+    /// Optional ground-truth expected outputs, one per query.
+    /// </param>
+    /// <param name="expectedToolCalls">
+    /// Optional expected tool calls, one list per query.
+    /// </param>
+    /// <param name="splitter">
+    /// Optional conversation splitter to apply to all items.
+    /// Use <see cref="ConversationSplitters.LastTurn"/>, <see cref="ConversationSplitters.Full"/>,
+    /// or a custom <see cref="IConversationSplitter"/> implementation.
+    /// </param>
+    /// <param name="numRepetitions">
+    /// Number of times to run each query (default 1). When greater than 1, each query is invoked
+    /// independently N times to measure consistency.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>One result per evaluator.</returns>
+    public static async Task<IReadOnlyList<AgentEvaluationResults>> EvaluateAsync(
+        this AIAgent agent,
+        IEnumerable<string> queries,
+        IEnumerable<IAgentEvaluator> evaluators,
+        string evalName = "Agent Framework Eval",
+        IEnumerable<string>? expectedOutput = null,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls = null,
+        IConversationSplitter? splitter = null,
+        int numRepetitions = 1,
+        CancellationToken cancellationToken = default)
+    {
+        var items = await RunAgentForEvalAsync(agent, queries, expectedOutput, expectedToolCalls, splitter, numRepetitions, cancellationToken).ConfigureAwait(false);
+
+        var results = new List<AgentEvaluationResults>();
+        foreach (var evaluator in evaluators)
+        {
+            var result = await evaluator.EvaluateAsync(items, evalName, cancellationToken).ConfigureAwait(false);
+            results.Add(result);
+        }
+
+        return results;
+    }
+
+    /// <summary>
+    /// Evaluates pre-existing agent responses without re-running the agent.
+    /// </summary>
+    /// <param name="agent">The agent (used for tool definitions).</param>
+    /// <param name="responses">Pre-existing agent responses.</param>
+    /// <param name="queries">The queries that produced each response (must match count).</param>
+    /// <param name="evaluator">The evaluator to score responses.</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="expectedOutput">
+    /// Optional ground-truth expected outputs, one per query.
+    /// </param>
+    /// <param name="expectedToolCalls">
+    /// Optional expected tool calls, one list per query.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Evaluation results.</returns>
+    public static async Task<AgentEvaluationResults> EvaluateAsync(
+        this AIAgent agent,
+        IEnumerable<AgentResponse> responses,
+        IEnumerable<string> queries,
+        IAgentEvaluator evaluator,
+        string evalName = "Agent Framework Eval",
+        IEnumerable<string>? expectedOutput = null,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls = null,
+        CancellationToken cancellationToken = default)
+    {
+        var items = BuildItemsFromResponses(agent, responses, queries, expectedOutput, expectedToolCalls);
+        return await evaluator.EvaluateAsync(items, evalName, cancellationToken).ConfigureAwait(false);
+    }
+
+    /// <summary>
+    /// Evaluates pre-existing agent responses using an MEAI evaluator directly.
+    /// </summary>
+    /// <param name="agent">The agent (used for tool definitions).</param>
+    /// <param name="responses">Pre-existing agent responses.</param>
+    /// <param name="queries">The queries that produced each response (must match count).</param>
+    /// <param name="evaluator">The MEAI evaluator.</param>
+    /// <param name="chatConfiguration">Chat configuration for the MEAI evaluator.</param>
+    /// <param name="evalName">Display name for this evaluation run.</param>
+    /// <param name="expectedOutput">
+    /// Optional ground-truth expected outputs, one per query.
+    /// </param>
+    /// <param name="expectedToolCalls">
+    /// Optional expected tool calls, one list per query.
+    /// </param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Evaluation results.</returns>
+    public static async Task<AgentEvaluationResults> EvaluateAsync(
+        this AIAgent agent,
+        IEnumerable<AgentResponse> responses,
+        IEnumerable<string> queries,
+        IEvaluator evaluator,
+        ChatConfiguration chatConfiguration,
+        string evalName = "Agent Framework Eval",
+        IEnumerable<string>? expectedOutput = null,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls = null,
+        CancellationToken cancellationToken = default)
+    {
+        var wrapped = new MeaiEvaluatorAdapter(evaluator, chatConfiguration);
+        return await agent.EvaluateAsync(responses, queries, wrapped, evalName, expectedOutput, expectedToolCalls, cancellationToken).ConfigureAwait(false);
+    }
+
+    internal static List<EvalItem> BuildItemsFromResponses(
+        AIAgent agent,
+        IEnumerable<AgentResponse> responses,
+        IEnumerable<string> queries,
+        IEnumerable<string>? expectedOutput,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls)
+    {
+        var responseList = responses.ToList();
+        var queryList = queries.ToList();
+        var expectedList = expectedOutput?.ToList();
+        var expectedToolCallsList = expectedToolCalls?.ToList();
+
+        if (responseList.Count != queryList.Count)
+        {
+            throw new ArgumentException(
+                $"Got {queryList.Count} queries but {responseList.Count} responses. Counts must match.");
+        }
+
+        if (expectedList != null && expectedList.Count != queryList.Count)
+        {
+            throw new ArgumentException(
+                $"Got {queryList.Count} queries but {expectedList.Count} expectedOutput values. Counts must match.");
+        }
+
+        if (expectedToolCallsList != null && expectedToolCallsList.Count != queryList.Count)
+        {
+            throw new ArgumentException(
+                $"Got {queryList.Count} queries but {expectedToolCallsList.Count} expectedToolCalls lists. Counts must match.");
+        }
+
+        var items = new List<EvalItem>();
+        for (int i = 0; i < responseList.Count; i++)
+        {
+            var query = queryList[i];
+            var response = responseList[i];
+
+            var messages = new List<ChatMessage>
+            {
+                new(ChatRole.User, query),
+            };
+            messages.AddRange(response.Messages);
+
+            var item = BuildEvalItem(query, response, messages, agent);
+            if (expectedList != null)
+            {
+                item.ExpectedOutput = expectedList[i];
+            }
+
+            if (expectedToolCallsList != null)
+            {
+                item.ExpectedToolCalls = expectedToolCallsList[i].ToList();
+            }
+
+            items.Add(item);
+        }
+
+        return items;
+    }
+
+    private static async Task<List<EvalItem>> RunAgentForEvalAsync(
+        AIAgent agent,
+        IEnumerable<string> queries,
+        IEnumerable<string>? expectedOutput,
+        IEnumerable<IEnumerable<ExpectedToolCall>>? expectedToolCalls,
+        IConversationSplitter? splitter,
+        int numRepetitions,
+        CancellationToken cancellationToken)
+    {
+        if (numRepetitions < 1)
+        {
+            throw new ArgumentException($"numRepetitions must be >= 1, got {numRepetitions}.", nameof(numRepetitions));
+        }
+
+        var items = new List<EvalItem>();
+        var queryList = queries.ToList();
+        var expectedList = expectedOutput?.ToList();
+        var expectedToolCallsList = expectedToolCalls?.ToList();
+
+        if (expectedList != null && expectedList.Count != queryList.Count)
+        {
+            throw new ArgumentException(
+                $"Got {queryList.Count} queries but {expectedList.Count} expectedOutput values. Counts must match.");
+        }
+
+        if (expectedToolCallsList != null && expectedToolCallsList.Count != queryList.Count)
+        {
+            throw new ArgumentException(
+                $"Got {queryList.Count} queries but {expectedToolCallsList.Count} expectedToolCalls lists. Counts must match.");
+        }
+
+        for (int rep = 0; rep < numRepetitions; rep++)
+        {
+            for (int i = 0; i < queryList.Count; i++)
+            {
+                cancellationToken.ThrowIfCancellationRequested();
+
+                var query = queryList[i];
+                var messages = new List<ChatMessage>
+                {
+                    new(ChatRole.User, query),
+                };
+
+                var response = await agent.RunAsync(messages, cancellationToken: cancellationToken).ConfigureAwait(false);
+                var item = BuildEvalItem(query, response, messages, agent);
+                item.Splitter = splitter;
+                if (expectedList != null)
+                {
+                    item.ExpectedOutput = expectedList[i];
+                }
+
+                if (expectedToolCallsList != null)
+                {
+                    item.ExpectedToolCalls = expectedToolCallsList[i].ToList();
+                }
+
+                items.Add(item);
+            }
+        }
+
+        return items;
+    }
+
+    internal static EvalItem BuildEvalItem(
+        string query,
+        AgentResponse response,
+        List<ChatMessage> messages,
+        AIAgent agent)
+    {
+        // Build conversation from existing messages plus any new response messages
+        var conversation = new List<ChatMessage>(messages);
+        foreach (var msg in response.Messages)
+        {
+            if (!conversation.Contains(msg))
+            {
+                conversation.Add(msg);
+            }
+        }
+
+        return new EvalItem(query, response.Text, conversation)
+        {
+            RawResponse = new ChatResponse(response.Messages.LastOrDefault()
+                ?? new ChatMessage(ChatRole.Assistant, response.Text)),
+        };
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationResults.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationResults.cs
new file mode 100644
index 0000000000..c46bc8046b
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/AgentEvaluationResults.cs
@@ -0,0 +1,127 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Aggregate evaluation results across multiple items.
+/// </summary>
+public sealed class AgentEvaluationResults
+{
+    private readonly List<EvaluationResult> _items;
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="AgentEvaluationResults"/> class.
+    /// </summary>
+    /// <param name="provider">Name of the evaluation provider.</param>
+    /// <param name="items">Per-item MEAI evaluation results.</param>
+    /// <param name="inputItems">The original eval items that were evaluated, for auditing.</param>
+    public AgentEvaluationResults(string provider, IEnumerable<EvaluationResult> items, IReadOnlyList<EvalItem>? inputItems = null)
+    {
+        this.Provider = provider;
+        this._items = new List<EvaluationResult>(items);
+        this.InputItems = inputItems;
+    }
+
+    /// <summary>Gets the evaluation provider name.</summary>
+    public string Provider { get; }
+
+    /// <summary>Gets the portal URL for viewing results (Foundry only).</summary>
+    public Uri? ReportUrl { get; set; }
+
+    /// <summary>Gets the per-item MEAI evaluation results.</summary>
+    public IReadOnlyList<EvaluationResult> Items => this._items;
+
+    /// <summary>
+    /// Gets the original eval items that produced these results, for auditing.
+    /// Each entry corresponds positionally to <see cref="Items"/> — <c>InputItems[i]</c>
+    /// is the query/response that produced <c>Items[i]</c>.
+    /// </summary>
+    public IReadOnlyList<EvalItem>? InputItems { get; }
+
+    /// <summary>Gets per-agent results for workflow evaluations.</summary>
+    public IReadOnlyDictionary<string, AgentEvaluationResults>? SubResults { get; set; }
+
+    /// <summary>Gets the number of items that passed.</summary>
+    public int Passed => this._items.Count(ItemPassed);
+
+    /// <summary>Gets the number of items that failed.</summary>
+    public int Failed => this._items.Count(i => !ItemPassed(i));
+
+    /// <summary>Gets the total number of items evaluated.</summary>
+    public int Total => this._items.Count;
+
+    /// <summary>Gets whether all items passed.</summary>
+    public bool AllPassed
+    {
+        get
+        {
+            if (this.SubResults is not null)
+            {
+                return this.SubResults.Values.All(s => s.AllPassed)
+                    && (this.Total == 0 || this.Failed == 0);
+            }
+
+            return this.Total > 0 && this.Failed == 0;
+        }
+    }
+
+    /// <summary>
+    /// Asserts that all items passed. Throws <see cref="InvalidOperationException"/> on failure.
+    /// </summary>
+    /// <param name="message">Optional custom failure message.</param>
+    /// <exception cref="InvalidOperationException">Thrown when any items failed.</exception>
+    public void AssertAllPassed(string? message = null)
+    {
+        if (!this.AllPassed)
+        {
+            var detail = message ?? $"{this.Provider}: {this.Passed} passed, {this.Failed} failed out of {this.Total}.";
+            if (this.ReportUrl is not null)
+            {
+                detail += $" See {this.ReportUrl} for details.";
+            }
+
+            if (this.SubResults is not null)
+            {
+                var failedAgents = this.SubResults
+                    .Where(kvp => !kvp.Value.AllPassed)
+                    .Select(kvp => kvp.Key);
+                detail += $" Failed agents: {string.Join(", ", failedAgents)}.";
+            }
+
+            throw new InvalidOperationException(detail);
+        }
+    }
+
+    private static bool ItemPassed(EvaluationResult result)
+    {
+        foreach (var metric in result.Metrics.Values)
+        {
+            if (metric.Interpretation?.Failed == true)
+            {
+                return false;
+            }
+
+            if (metric is NumericMetric numeric && numeric.Value.HasValue)
+            {
+                if (numeric.Value.Value < 3.0)
+                {
+                    return false;
+                }
+            }
+            else if (metric is BooleanMetric boolean && boolean.Value.HasValue)
+            {
+                if (!boolean.Value.Value)
+                {
+                    return false;
+                }
+            }
+        }
+
+        return result.Metrics.Count > 0;
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/CheckResult.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/CheckResult.cs
new file mode 100644
index 0000000000..46f47bb3c9
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/CheckResult.cs
@@ -0,0 +1,11 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Result of a single check on a single evaluation item.
+/// </summary>
+/// <param name="Passed">Whether the check passed.</param>
+/// <param name="Reason">Human-readable explanation.</param>
+/// <param name="CheckName">Name of the check that produced this result.</param>
+public sealed record EvalCheckResult(bool Passed, string Reason, string CheckName);
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalCheck.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalCheck.cs
new file mode 100644
index 0000000000..eae0750418
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalCheck.cs
@@ -0,0 +1,10 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Delegate for a synchronous evaluation check on a single item.
+/// </summary>
+/// <param name="item">The evaluation item.</param>
+/// <returns>The check result.</returns>
+public delegate EvalCheckResult EvalCheck(EvalItem item);
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalChecks.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalChecks.cs
new file mode 100644
index 0000000000..5dfa2da612
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalChecks.cs
@@ -0,0 +1,86 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Built-in check functions for common evaluation patterns.
+/// </summary>
+public static class EvalChecks
+{
+    /// <summary>
+    /// Creates a check that verifies the response contains all specified keywords.
+    /// </summary>
+    /// <param name="keywords">Keywords that must appear in the response.</param>
+    /// <returns>An <see cref="EvalCheck"/> delegate.</returns>
+    public static EvalCheck KeywordCheck(params string[] keywords)
+    {
+        return KeywordCheck(caseSensitive: false, keywords);
+    }
+
+    /// <summary>
+    /// Creates a check that verifies the response contains all specified keywords.
+    /// </summary>
+    /// <param name="caseSensitive">Whether the comparison is case-sensitive.</param>
+    /// <param name="keywords">Keywords that must appear in the response.</param>
+    /// <returns>An <see cref="EvalCheck"/> delegate.</returns>
+    public static EvalCheck KeywordCheck(bool caseSensitive, params string[] keywords)
+    {
+        return (EvalItem item) =>
+        {
+            var comparison = caseSensitive
+                ? StringComparison.Ordinal
+                : StringComparison.OrdinalIgnoreCase;
+
+            var missing = keywords
+                .Where(kw => !item.Response.Contains(kw, comparison))
+                .ToList();
+
+            var passed = missing.Count == 0;
+            var reason = passed
+                ? $"All keywords found: {string.Join(", ", keywords)}"
+                : $"Missing keywords: {string.Join(", ", missing)}";
+
+            return new EvalCheckResult(passed, reason, "keyword_check");
+        };
+    }
+
+    /// <summary>
+    /// Creates a check that verifies specific tools were called in the conversation.
+    /// </summary>
+    /// <param name="toolNames">Tool names that must appear in the conversation.</param>
+    /// <returns>An <see cref="EvalCheck"/> delegate.</returns>
+    public static EvalCheck ToolCalledCheck(params string[] toolNames)
+    {
+        return (EvalItem item) =>
+        {
+            var calledTools = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
+
+            foreach (var message in item.Conversation)
+            {
+                foreach (var content in message.Contents)
+                {
+                    if (content is FunctionCallContent functionCall)
+                    {
+                        calledTools.Add(functionCall.Name);
+                    }
+                }
+            }
+
+            var missing = toolNames
+                .Where(t => !calledTools.Contains(t))
+                .ToList();
+
+            var passed = missing.Count == 0;
+            var reason = passed
+                ? $"All tools called: {string.Join(", ", toolNames)}"
+                : $"Missing tool calls: {string.Join(", ", missing)}";
+
+            return new EvalCheckResult(passed, reason, "tool_called_check");
+        };
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalItem.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalItem.cs
new file mode 100644
index 0000000000..93e860ae65
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/EvalItem.cs
@@ -0,0 +1,140 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Provider-agnostic data for a single evaluation item.
+/// </summary>
+public sealed class EvalItem
+{
+    /// <summary>
+    /// Initializes a new instance of the <see cref="EvalItem"/> class.
+    /// </summary>
+    /// <param name="query">The user query.</param>
+    /// <param name="response">The agent response text.</param>
+    /// <param name="conversation">The full conversation as <see cref="ChatMessage"/> list.</param>
+    public EvalItem(string query, string response, IReadOnlyList<ChatMessage> conversation)
+    {
+        this.Query = query;
+        this.Response = response;
+        this.Conversation = conversation;
+    }
+
+    /// <summary>Gets the user query.</summary>
+    public string Query { get; }
+
+    /// <summary>Gets the agent response text.</summary>
+    public string Response { get; }
+
+    /// <summary>Gets the full conversation history.</summary>
+    public IReadOnlyList<ChatMessage> Conversation { get; }
+
+    /// <summary>Gets or sets the tools available to the agent.</summary>
+    public IReadOnlyList<AITool>? Tools { get; set; }
+
+    /// <summary>Gets or sets grounding context for evaluation.</summary>
+    public string? Context { get; set; }
+
+    /// <summary>Gets or sets the expected output for ground-truth comparison.</summary>
+    public string? ExpectedOutput { get; set; }
+
+    /// <summary>
+    /// Gets or sets the expected tool calls for tool-correctness evaluation.
+    /// </summary>
+    /// <remarks>
+    /// Each entry describes a tool call the agent should make. The evaluator
+    /// decides matching semantics (ordering, extras, argument checking).
+    /// See <see cref="ExpectedToolCall"/>.
+    /// </remarks>
+    public IReadOnlyList<ExpectedToolCall>? ExpectedToolCalls { get; set; }
+
+    /// <summary>Gets or sets the raw chat response for MEAI evaluators.</summary>
+    public ChatResponse? RawResponse { get; set; }
+
+    /// <summary>
+    /// Gets or sets the conversation splitter for this item.
+    /// </summary>
+    /// <remarks>
+    /// When set by orchestration functions (e.g. <c>EvaluateAsync(splitter: ...)</c>),
+    /// this is used as the default by <see cref="Split(IConversationSplitter?)"/>.
+    /// Priority: explicit <c>Split(splitter)</c> argument &gt;
+    /// <see cref="Splitter"/> &gt; <see cref="ConversationSplitters.LastTurn"/>.
+    /// </remarks>
+    public IConversationSplitter? Splitter { get; set; }
+
+    /// <summary>
+    /// Splits the conversation into query messages and response messages.
+    /// </summary>
+    /// <param name="splitter">
+    /// The splitter to use. When <c>null</c>, uses <see cref="Splitter"/>
+    /// if set, otherwise <see cref="ConversationSplitters.LastTurn"/>.
+    /// </param>
+    /// <returns>A tuple of (query messages, response messages).</returns>
+    public (IReadOnlyList<ChatMessage> QueryMessages, IReadOnlyList<ChatMessage> ResponseMessages) Split(
+        IConversationSplitter? splitter = null)
+    {
+        var effective = splitter ?? this.Splitter ?? ConversationSplitters.LastTurn;
+        return effective.Split(this.Conversation);
+    }
+
+    /// <summary>
+    /// Splits a multi-turn conversation into one <see cref="EvalItem"/> per user turn.
+    /// </summary>
+    /// <remarks>
+    /// Each user message starts a new turn. The resulting item has cumulative context:
+    /// query messages contain the full conversation up to and including that user message,
+    /// and the response is everything up to the next user message.
+    /// </remarks>
+    /// <param name="conversation">The full conversation to split.</param>
+    /// <param name="tools">Optional tools available to the agent.</param>
+    /// <param name="context">Optional grounding context.</param>
+    /// <returns>A list of eval items, one per user turn.</returns>
+    public static IReadOnlyList<EvalItem> PerTurnItems(
+        IReadOnlyList<ChatMessage> conversation,
+        IReadOnlyList<AITool>? tools = null,
+        string? context = null)
+    {
+        var items = new List<EvalItem>();
+        var userIndices = new List<int>();
+
+        for (int i = 0; i < conversation.Count; i++)
+        {
+            if (conversation[i].Role == ChatRole.User)
+            {
+                userIndices.Add(i);
+            }
+        }
+
+        for (int t = 0; t < userIndices.Count; t++)
+        {
+            int userIdx = userIndices[t];
+            int nextBoundary = t + 1 < userIndices.Count
+                ? userIndices[t + 1]
+                : conversation.Count;
+
+            var responseMessages = conversation.Skip(userIdx + 1).Take(nextBoundary - userIdx - 1).ToList();
+
+            var query = conversation[userIdx].Text ?? string.Empty;
+            var responseText = string.Join(
+                " ",
+                responseMessages
+                    .Where(m => m.Role == ChatRole.Assistant && !string.IsNullOrEmpty(m.Text))
+                    .Select(m => m.Text));
+
+            var fullSlice = conversation.Take(nextBoundary).ToList();
+            var item = new EvalItem(query, responseText, fullSlice)
+            {
+                Tools = tools,
+                Context = context,
+            };
+
+            items.Add(item);
+        }
+
+        return items;
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/ExpectedToolCall.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/ExpectedToolCall.cs
new file mode 100644
index 0000000000..9b30899df4
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/ExpectedToolCall.cs
@@ -0,0 +1,20 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// A tool call that an agent is expected to make.
+/// </summary>
+/// <remarks>
+/// Used with <c>EvaluateAsync</c> to assert that the agent called the correct tools.
+/// The evaluator decides matching semantics (order, extras, argument checking);
+/// this type is pure data.
+/// </remarks>
+/// <param name="Name">The tool/function name (e.g. <c>"get_weather"</c>).</param>
+/// <param name="Arguments">
+/// Expected arguments. <c>null</c> means "don't check arguments".
+/// When provided, evaluators typically do subset matching (all expected keys must be present).
+/// </param>
+public record ExpectedToolCall(string Name, IReadOnlyDictionary<string, object>? Arguments = null);
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/FunctionEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/FunctionEvaluator.cs
new file mode 100644
index 0000000000..a9024c7750
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/FunctionEvaluator.cs
@@ -0,0 +1,68 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Factory for creating <see cref="EvalCheck"/> delegates from typed lambda functions.
+/// </summary>
+public static class FunctionEvaluator
+{
+    /// <summary>
+    /// Creates a check from a function that takes the response text and returns a bool.
+    /// </summary>
+    /// <param name="name">Check name for reporting.</param>
+    /// <param name="check">Function that returns true if the response passes.</param>
+    public static EvalCheck Create(string name, Func<string, bool> check)
+    {
+        return (EvalItem item) =>
+        {
+            var passed = check(item.Response);
+            return new EvalCheckResult(passed, passed ? "Passed" : "Failed", name);
+        };
+    }
+
+    /// <summary>
+    /// Creates a check from a function that takes response and expected text.
+    /// </summary>
+    /// <param name="name">Check name for reporting.</param>
+    /// <param name="check">Function that returns true if the response passes.</param>
+    public static EvalCheck Create(string name, Func<string, string?, bool> check)
+    {
+        return (EvalItem item) =>
+        {
+            var passed = check(item.Response, item.ExpectedOutput);
+            return new EvalCheckResult(passed, passed ? "Passed" : "Failed", name);
+        };
+    }
+
+    /// <summary>
+    /// Creates a check from a function that takes the full <see cref="EvalItem"/>.
+    /// </summary>
+    /// <param name="name">Check name for reporting.</param>
+    /// <param name="check">Function that returns true if the item passes.</param>
+    public static EvalCheck Create(string name, Func<EvalItem, bool> check)
+    {
+        return (EvalItem item) =>
+        {
+            var passed = check(item);
+            return new EvalCheckResult(passed, passed ? "Passed" : "Failed", name);
+        };
+    }
+
+    /// <summary>
+    /// Creates a check from a function that takes the full <see cref="EvalItem"/>
+    /// and returns a <see cref="EvalCheckResult"/>.
+    /// </summary>
+    /// <param name="name">Check name (used as fallback if the result has no name).</param>
+    /// <param name="check">Function that returns a full check result.</param>
+    public static EvalCheck Create(string name, Func<EvalItem, EvalCheckResult> check)
+    {
+        return (EvalItem item) =>
+        {
+            var result = check(item);
+            return result with { CheckName = result.CheckName ?? name };
+        };
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/IAgentEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/IAgentEvaluator.cs
new file mode 100644
index 0000000000..2dc84e35eb
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/IAgentEvaluator.cs
@@ -0,0 +1,33 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Batch-oriented evaluator interface for agent evaluation.
+/// </summary>
+/// <remarks>
+/// Unlike MEAI's <c>IEvaluator</c> which evaluates one item at a time,
+/// <see cref="IAgentEvaluator"/> evaluates a batch of items. This enables
+/// efficient cloud-based evaluation (e.g., Foundry) and aggregate result computation.
+/// </remarks>
+public interface IAgentEvaluator
+{
+    /// <summary>Gets the evaluator name.</summary>
+    string Name { get; }
+
+    /// <summary>
+    /// Evaluates a batch of items and returns aggregate results.
+    /// </summary>
+    /// <param name="items">The items to evaluate.</param>
+    /// <param name="evalName">A display name for this evaluation run.</param>
+    /// <param name="cancellationToken">Cancellation token.</param>
+    /// <returns>Aggregate evaluation results.</returns>
+    Task<AgentEvaluationResults> EvaluateAsync(
+        IReadOnlyList<EvalItem> items,
+        string evalName = "Agent Framework Eval",
+        CancellationToken cancellationToken = default);
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/IConversationSplitter.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/IConversationSplitter.cs
new file mode 100644
index 0000000000..f07282e4de
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/IConversationSplitter.cs
@@ -0,0 +1,103 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Linq;
+using Microsoft.Extensions.AI;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Strategy for splitting a conversation into query and response halves for evaluation.
+/// </summary>
+/// <remarks>
+/// Use one of the built-in splitters from <see cref="ConversationSplitters"/> or implement
+/// your own for domain-specific splitting logic (e.g., splitting before a memory-retrieval
+/// tool call to evaluate recall quality).
+/// </remarks>
+public interface IConversationSplitter
+{
+    /// <summary>
+    /// Splits a conversation into query messages and response messages.
+    /// </summary>
+    /// <param name="conversation">The full conversation to split.</param>
+    /// <returns>A tuple of (query messages, response messages).</returns>
+    (IReadOnlyList<ChatMessage> QueryMessages, IReadOnlyList<ChatMessage> ResponseMessages) Split(
+        IReadOnlyList<ChatMessage> conversation);
+}
+
+/// <summary>
+/// Built-in conversation splitters for common evaluation patterns.
+/// </summary>
+/// <remarks>
+/// <list type="bullet">
+///   <item><see cref="LastTurn"/>: Evaluates whether the agent answered the <em>latest</em> question well.</item>
+///   <item><see cref="Full"/>: Evaluates whether the <em>whole conversation trajectory</em> served the original request.</item>
+/// </list>
+/// For custom splits, implement <see cref="IConversationSplitter"/> directly.
+/// </remarks>
+public static class ConversationSplitters
+{
+    /// <summary>
+    /// Split at the last user message. Everything up to and including that message
+    /// is the query; everything after is the response. This is the default strategy.
+    /// </summary>
+    public static IConversationSplitter LastTurn { get; } = new LastTurnSplitter();
+
+    /// <summary>
+    /// The first user message (and any preceding system messages) is the query;
+    /// the entire remainder of the conversation is the response.
+    /// Evaluates overall conversation trajectory.
+    /// </summary>
+    public static IConversationSplitter Full { get; } = new FullSplitter();
+
+    private sealed class LastTurnSplitter : IConversationSplitter
+    {
+        public (IReadOnlyList<ChatMessage>, IReadOnlyList<ChatMessage>) Split(
+            IReadOnlyList<ChatMessage> conversation)
+        {
+            int lastUserIdx = -1;
+            for (int i = 0; i < conversation.Count; i++)
+            {
+                if (conversation[i].Role == ChatRole.User)
+                {
+                    lastUserIdx = i;
+                }
+            }
+
+            if (lastUserIdx >= 0)
+            {
+                return (
+                    conversation.Take(lastUserIdx + 1).ToList(),
+                    conversation.Skip(lastUserIdx + 1).ToList());
+            }
+
+            return (new List<ChatMessage>(), conversation.ToList());
+        }
+    }
+
+    private sealed class FullSplitter : IConversationSplitter
+    {
+        public (IReadOnlyList<ChatMessage>, IReadOnlyList<ChatMessage>) Split(
+            IReadOnlyList<ChatMessage> conversation)
+        {
+            int firstUserIdx = -1;
+            for (int i = 0; i < conversation.Count; i++)
+            {
+                if (conversation[i].Role == ChatRole.User)
+                {
+                    firstUserIdx = i;
+                    break;
+                }
+            }
+
+            if (firstUserIdx >= 0)
+            {
+                return (
+                    conversation.Take(firstUserIdx + 1).ToList(),
+                    conversation.Skip(firstUserIdx + 1).ToList());
+            }
+
+            return (new List<ChatMessage>(), conversation.ToList());
+        }
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/LocalEvaluator.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/LocalEvaluator.cs
new file mode 100644
index 0000000000..2b664b0e3b
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/LocalEvaluator.cs
@@ -0,0 +1,66 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Evaluator that runs check functions locally without API calls.
+/// </summary>
+public sealed class LocalEvaluator : IAgentEvaluator
+{
+    private readonly EvalCheck[] _checks;
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="LocalEvaluator"/> class.
+    /// </summary>
+    /// <param name="checks">The check functions to run on each item.</param>
+    public LocalEvaluator(params EvalCheck[] checks)
+    {
+        this._checks = checks;
+    }
+
+    /// <inheritdoc />
+    public string Name => "LocalEvaluator";
+
+    /// <inheritdoc />
+    public Task<AgentEvaluationResults> EvaluateAsync(
+        IReadOnlyList<EvalItem> items,
+        string evalName = "Local Eval",
+        CancellationToken cancellationToken = default)
+    {
+        var results = new List<EvaluationResult>(items.Count);
+
+        foreach (var item in items)
+        {
+            cancellationToken.ThrowIfCancellationRequested();
+
+            var evalResult = new EvaluationResult();
+
+            foreach (var check in this._checks)
+            {
+                var EvalCheckResult = check(item);
+                evalResult.Metrics[EvalCheckResult.CheckName] = new BooleanMetric(
+                    EvalCheckResult.CheckName,
+                    EvalCheckResult.Passed,
+                    reason: EvalCheckResult.Reason)
+                {
+                    Interpretation = new EvaluationMetricInterpretation
+                    {
+                        Rating = EvalCheckResult.Passed
+                            ? EvaluationRating.Good
+                            : EvaluationRating.Unacceptable,
+                        Failed = !EvalCheckResult.Passed,
+                    },
+                };
+            }
+
+            results.Add(evalResult);
+        }
+
+        return Task.FromResult(new AgentEvaluationResults(this.Name, results, inputItems: items));
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Evaluation/MeaiEvaluatorAdapter.cs b/dotnet/src/Microsoft.Agents.AI/Evaluation/MeaiEvaluatorAdapter.cs
new file mode 100644
index 0000000000..e2a6ea67e4
--- /dev/null
+++ b/dotnet/src/Microsoft.Agents.AI/Evaluation/MeaiEvaluatorAdapter.cs
@@ -0,0 +1,63 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI;
+
+/// <summary>
+/// Adapter that wraps an MEAI <see cref="IEvaluator"/> into an <see cref="IAgentEvaluator"/>.
+/// Runs the MEAI evaluator per-item and aggregates results.
+/// </summary>
+internal sealed class MeaiEvaluatorAdapter : IAgentEvaluator
+{
+    private readonly IEvaluator _evaluator;
+    private readonly ChatConfiguration _chatConfiguration;
+
+    /// <summary>
+    /// Initializes a new instance of the <see cref="MeaiEvaluatorAdapter"/> class.
+    /// </summary>
+    /// <param name="evaluator">The MEAI evaluator to wrap.</param>
+    /// <param name="chatConfiguration">Chat configuration for the evaluator (includes the judge model).</param>
+    public MeaiEvaluatorAdapter(IEvaluator evaluator, ChatConfiguration chatConfiguration)
+    {
+        this._evaluator = evaluator;
+        this._chatConfiguration = chatConfiguration;
+    }
+
+    /// <inheritdoc />
+    public string Name => this._evaluator.GetType().Name;
+
+    /// <inheritdoc />
+    public async Task<AgentEvaluationResults> EvaluateAsync(
+        IReadOnlyList<EvalItem> items,
+        string evalName = "MEAI Eval",
+        CancellationToken cancellationToken = default)
+    {
+        var results = new List<EvaluationResult>(items.Count);
+
+        foreach (var item in items)
+        {
+            cancellationToken.ThrowIfCancellationRequested();
+
+            var (queryMessages, _) = item.Split();
+            var messages = queryMessages.ToList();
+            var chatResponse = item.RawResponse
+                ?? new ChatResponse(new ChatMessage(ChatRole.Assistant, item.Response));
+
+            var result = await this._evaluator.EvaluateAsync(
+                messages,
+                chatResponse,
+                this._chatConfiguration,
+                cancellationToken: cancellationToken).ConfigureAwait(false);
+
+            results.Add(result);
+        }
+
+        return new AgentEvaluationResults(this.Name, results);
+    }
+}
diff --git a/dotnet/src/Microsoft.Agents.AI/Microsoft.Agents.AI.csproj b/dotnet/src/Microsoft.Agents.AI/Microsoft.Agents.AI.csproj
index 70da404a61..a111ce8c2d 100644
--- a/dotnet/src/Microsoft.Agents.AI/Microsoft.Agents.AI.csproj
+++ b/dotnet/src/Microsoft.Agents.AI/Microsoft.Agents.AI.csproj
@@ -31,6 +31,14 @@
     <PackageReference Include="System.Diagnostics.DiagnosticSource" />
   </ItemGroup>
 
+  <!-- Evaluation support requires net8.0+ (MEAI.Evaluation does not support legacy TFMs) -->
+  <ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <PackageReference Include="Microsoft.Extensions.AI.Evaluation" />
+  </ItemGroup>
+  <ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <Compile Remove="Evaluation\**\*.cs" />
+  </ItemGroup>
+
   <PropertyGroup>
     <!-- NuGet Package Settings -->
     <Title>Microsoft Agent Framework</Title>
diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/EvaluationTests.cs b/dotnet/tests/Microsoft.Agents.AI.UnitTests/EvaluationTests.cs
new file mode 100644
index 0000000000..00c3519f3f
--- /dev/null
+++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/EvaluationTests.cs
@@ -0,0 +1,1112 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading.Tasks;
+using Microsoft.Extensions.AI;
+using Microsoft.Extensions.AI.Evaluation;
+
+namespace Microsoft.Agents.AI.UnitTests;
+
+/// <summary>
+/// Tests for the evaluation types: <see cref="LocalEvaluator"/>, <see cref="FunctionEvaluator"/>,
+/// <see cref="EvalChecks"/>, and <see cref="AgentEvaluationResults"/>.
+/// </summary>
+public sealed class EvaluationTests
+{
+    private static EvalItem CreateItem(
+        string query = "What is the weather?",
+        string response = "The weather in Seattle is sunny and 72°F.",
+        IReadOnlyList<ChatMessage>? conversation = null)
+    {
+        conversation ??= new List<ChatMessage>
+        {
+            new(ChatRole.User, query),
+            new(ChatRole.Assistant, response),
+        };
+
+        return new EvalItem(query, response, conversation);
+    }
+
+    // ---------------------------------------------------------------
+    // EvalItem tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void EvalItem_Constructor_SetsProperties()
+    {
+        // Arrange & Act
+        var item = CreateItem();
+
+        // Assert
+        Assert.Equal("What is the weather?", item.Query);
+        Assert.Equal("The weather in Seattle is sunny and 72°F.", item.Response);
+        Assert.Equal(2, item.Conversation.Count);
+        Assert.Null(item.ExpectedOutput);
+        Assert.Null(item.Context);
+        Assert.Null(item.Tools);
+    }
+
+    [Fact]
+    public void EvalItem_OptionalProperties_CanBeSet()
+    {
+        // Arrange & Act
+        var item = CreateItem();
+        item.ExpectedOutput = "sunny";
+        item.Context = "Weather data for Seattle";
+
+        // Assert
+        Assert.Equal("sunny", item.ExpectedOutput);
+        Assert.Equal("Weather data for Seattle", item.Context);
+    }
+
+    // ---------------------------------------------------------------
+    // LocalEvaluator tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public async Task LocalEvaluator_WithPassingCheck_ReturnsPassedResultAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            FunctionEvaluator.Create("always_pass", (string _) => true));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.Equal("LocalEvaluator", results.Provider);
+        Assert.Equal(1, results.Total);
+        Assert.Equal(1, results.Passed);
+        Assert.Equal(0, results.Failed);
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task LocalEvaluator_WithFailingCheck_ReturnsFailedResultAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            FunctionEvaluator.Create("always_fail", (string _) => false));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.Equal(1, results.Total);
+        Assert.Equal(0, results.Passed);
+        Assert.Equal(1, results.Failed);
+        Assert.False(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task LocalEvaluator_WithMultipleChecks_AllChecksRunAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            FunctionEvaluator.Create("check1", (string _) => true),
+            FunctionEvaluator.Create("check2", (string _) => true));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.Equal(1, results.Total);
+        Assert.True(results.AllPassed);
+        var itemResult = results.Items[0];
+        Assert.Equal(2, itemResult.Metrics.Count);
+        Assert.True(itemResult.Metrics.ContainsKey("check1"));
+        Assert.True(itemResult.Metrics.ContainsKey("check2"));
+    }
+
+    [Fact]
+    public async Task LocalEvaluator_WithMultipleItems_EvaluatesAllAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck("weather"));
+
+        var items = new List<EvalItem>
+        {
+            CreateItem(response: "The weather is sunny."),
+            CreateItem(response: "I don't know about that topic."),
+        };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.Equal(2, results.Total);
+        Assert.Equal(1, results.Passed);
+        Assert.Equal(1, results.Failed);
+    }
+
+    // ---------------------------------------------------------------
+    // FunctionEvaluator tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public async Task FunctionEvaluator_ResponseOnly_PassesResponseAsync()
+    {
+        // Arrange
+        var check = FunctionEvaluator.Create("length_check",
+            (string response) => response.Length > 10);
+
+        var evaluator = new LocalEvaluator(check);
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task FunctionEvaluator_WithExpected_PassesExpectedAsync()
+    {
+        // Arrange
+        var check = FunctionEvaluator.Create("contains_expected",
+            (string response, string? expectedOutput) =>
+                expectedOutput != null && response.Contains(expectedOutput, StringComparison.OrdinalIgnoreCase));
+
+        var evaluator = new LocalEvaluator(check);
+        var item = CreateItem();
+        item.ExpectedOutput = "sunny";
+        var items = new List<EvalItem> { item };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task FunctionEvaluator_FullItem_AccessesAllFieldsAsync()
+    {
+        // Arrange
+        var check = FunctionEvaluator.Create("full_check",
+            (EvalItem item) => item.Query.Contains("weather", StringComparison.OrdinalIgnoreCase)
+                && item.Response.Length > 0);
+
+        var evaluator = new LocalEvaluator(check);
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task FunctionEvaluator_WithCheckResult_ReturnsCustomReasonAsync()
+    {
+        // Arrange
+        var check = FunctionEvaluator.Create("custom_check",
+            (EvalItem item) => new EvalCheckResult(true, "Custom reason", "custom_check"));
+
+        var evaluator = new LocalEvaluator(check);
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+        var metric = results.Items[0].Get<BooleanMetric>("custom_check");
+        Assert.Equal("Custom reason", metric.Reason);
+    }
+
+    // ---------------------------------------------------------------
+    // EvalChecks tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public async Task KeywordCheck_AllKeywordsPresent_PassesAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck("weather", "sunny"));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task KeywordCheck_MissingKeyword_FailsAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck("snow"));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.False(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task KeywordCheck_CaseInsensitiveByDefault_PassesAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck("WEATHER", "SUNNY"));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task KeywordCheck_CaseSensitive_FailsOnWrongCaseAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck(caseSensitive: true, "WEATHER"));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.False(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task ToolCalledCheck_ToolPresent_PassesAsync()
+    {
+        // Arrange
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.User, "What is the weather?"),
+            new(ChatRole.Assistant, new List<AIContent>
+            {
+                new FunctionCallContent("call1", "get_weather", new Dictionary<string, object?> { ["city"] = "Seattle" }),
+            }),
+            new(ChatRole.Tool, new List<AIContent>
+            {
+                new FunctionResultContent("call1", "72°F and sunny"),
+            }),
+            new(ChatRole.Assistant, "The weather is sunny and 72°F."),
+        };
+
+        var item = CreateItem(conversation: conversation);
+        var evaluator = new LocalEvaluator(
+            EvalChecks.ToolCalledCheck("get_weather"));
+
+        // Act
+        var results = await evaluator.EvaluateAsync(new List<EvalItem> { item });
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public async Task ToolCalledCheck_ToolMissing_FailsAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.ToolCalledCheck("get_weather"));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.False(results.AllPassed);
+    }
+
+    // ---------------------------------------------------------------
+    // AgentEvaluationResults tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void AgentEvaluationResults_AllPassed_WhenAllMetricsGood()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["check"] = new BooleanMetric("check", true)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Good,
+                Failed = false,
+            },
+        };
+
+        // Act
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Assert
+        Assert.True(results.AllPassed);
+        Assert.Equal(1, results.Passed);
+        Assert.Equal(0, results.Failed);
+    }
+
+    [Fact]
+    public void AgentEvaluationResults_NotAllPassed_WhenMetricFailed()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["check"] = new BooleanMetric("check", false)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Unacceptable,
+                Failed = true,
+            },
+        };
+
+        // Act
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Assert
+        Assert.False(results.AllPassed);
+        Assert.Equal(0, results.Passed);
+        Assert.Equal(1, results.Failed);
+    }
+
+    [Fact]
+    public void AssertAllPassed_ThrowsOnFailure()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["check"] = new BooleanMetric("check", false)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Unacceptable,
+                Failed = true,
+            },
+        };
+
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Act & Assert
+        var ex = Assert.Throws<InvalidOperationException>(() => results.AssertAllPassed());
+        Assert.Contains("0 passed", ex.Message);
+        Assert.Contains("1 failed", ex.Message);
+    }
+
+    [Fact]
+    public void AssertAllPassed_DoesNotThrowOnSuccess()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["check"] = new BooleanMetric("check", true)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Good,
+                Failed = false,
+            },
+        };
+
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Act & Assert (no exception)
+        results.AssertAllPassed();
+    }
+
+    [Fact]
+    public void AgentEvaluationResults_NumericMetric_HighScorePasses()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["relevance"] = new NumericMetric("relevance", 4.5);
+
+        // Act
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Assert
+        Assert.True(results.AllPassed);
+    }
+
+    [Fact]
+    public void AgentEvaluationResults_NumericMetric_LowScoreFails()
+    {
+        // Arrange
+        var evalResult = new EvaluationResult();
+        evalResult.Metrics["relevance"] = new NumericMetric("relevance", 2.0);
+
+        // Act
+        var results = new AgentEvaluationResults("test", new[] { evalResult });
+
+        // Assert
+        Assert.False(results.AllPassed);
+    }
+
+    [Fact]
+    public void AgentEvaluationResults_SubResults_AllPassedChecksChildren()
+    {
+        // Arrange
+        var passResult = new EvaluationResult();
+        passResult.Metrics["check"] = new BooleanMetric("check", true)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Good,
+                Failed = false,
+            },
+        };
+
+        var failResult = new EvaluationResult();
+        failResult.Metrics["check"] = new BooleanMetric("check", false)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Unacceptable,
+                Failed = true,
+            },
+        };
+
+        var results = new AgentEvaluationResults("test", Array.Empty<EvaluationResult>())
+        {
+            SubResults = new Dictionary<string, AgentEvaluationResults>
+            {
+                ["agent1"] = new("test", new[] { passResult }),
+                ["agent2"] = new("test", new[] { failResult }),
+            },
+        };
+
+        // Assert
+        Assert.False(results.AllPassed);
+    }
+
+    // ---------------------------------------------------------------
+    // Mixed evaluator tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public async Task LocalEvaluator_MixedChecks_ReportsCorrectCountsAsync()
+    {
+        // Arrange
+        var evaluator = new LocalEvaluator(
+            EvalChecks.KeywordCheck("weather"),
+            EvalChecks.KeywordCheck("snow"),
+            FunctionEvaluator.Create("is_long", (string r) => r.Length > 5));
+
+        var items = new List<EvalItem> { CreateItem() };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert
+        Assert.Equal(1, results.Total);
+
+        // One item with 3 checks: "weather" passes, "snow" fails, "is_long" passes
+        // The item has one failed metric so it should count as failed
+        Assert.Equal(0, results.Passed);
+        Assert.Equal(1, results.Failed);
+    }
+
+    // ---------------------------------------------------------------
+    // Conversation Split tests
+    // ---------------------------------------------------------------
+
+    private static List<ChatMessage> CreateMultiTurnConversation()
+    {
+        return new List<ChatMessage>
+        {
+            new(ChatRole.User, "What's the weather in Seattle?"),
+            new(ChatRole.Assistant, "Seattle is 62°F and cloudy."),
+            new(ChatRole.User, "And Paris?"),
+            new(ChatRole.Assistant, "Paris is 68°F and partly sunny."),
+            new(ChatRole.User, "Compare them."),
+            new(ChatRole.Assistant, "Seattle is cooler; Paris is warmer and sunnier."),
+        };
+    }
+
+    [Fact]
+    public void Split_LastTurn_SplitsAtLastUserMessage()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+        var item = new EvalItem("Compare them.", "Seattle is cooler; Paris is warmer and sunnier.", conversation);
+
+        // Act
+        var (query, response) = item.Split(ConversationSplitters.LastTurn);
+
+        // Assert — query includes everything up to and including "Compare them."
+        Assert.Equal(5, query.Count);
+        Assert.Equal(ChatRole.User, query[query.Count - 1].Role);
+        Assert.Contains("Compare", query[query.Count - 1].Text);
+
+        // Response is the final assistant message
+        Assert.Single(response);
+        Assert.Equal(ChatRole.Assistant, response[0].Role);
+    }
+
+    [Fact]
+    public void Split_Full_SplitsAtFirstUserMessage()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+        var item = new EvalItem("What's the weather in Seattle?", "Full trajectory", conversation);
+
+        // Act
+        var (query, response) = item.Split(ConversationSplitters.Full);
+
+        // Assert — query is just the first user message
+        Assert.Single(query);
+        Assert.Contains("Seattle", query[0].Text);
+
+        // Response is everything after
+        Assert.Equal(5, response.Count);
+    }
+
+    [Fact]
+    public void Split_Full_IncludesSystemMessagesInQuery()
+    {
+        // Arrange
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.System, "You are a weather assistant."),
+            new(ChatRole.User, "What's the weather?"),
+            new(ChatRole.Assistant, "It's sunny."),
+        };
+
+        var item = new EvalItem("What's the weather?", "It's sunny.", conversation);
+
+        // Act
+        var (query, response) = item.Split(ConversationSplitters.Full);
+
+        // Assert — system message + first user message
+        Assert.Equal(2, query.Count);
+        Assert.Equal(ChatRole.System, query[0].Role);
+        Assert.Equal(ChatRole.User, query[1].Role);
+        Assert.Single(response);
+    }
+
+    [Fact]
+    public void Split_DefaultIsLastTurn()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+        var item = new EvalItem("Compare them.", "response", conversation);
+
+        // Act — no split specified
+        var (query, response) = item.Split();
+
+        // Assert — same as LastTurn
+        Assert.Equal(5, query.Count);
+        Assert.Single(response);
+    }
+
+    [Fact]
+    public void Split_SplitterProperty_UsedWhenNoExplicitSplit()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+        var item = new EvalItem("query", "response", conversation)
+        {
+            Splitter = ConversationSplitters.Full,
+        };
+
+        // Act — no explicit split, should use Splitter
+        var (query, response) = item.Split();
+
+        // Assert — Full split
+        Assert.Single(query);
+        Assert.Equal(5, response.Count);
+    }
+
+    [Fact]
+    public void Split_ExplicitSplitter_OverridesSplitterProperty()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+        var item = new EvalItem("query", "response", conversation)
+        {
+            Splitter = ConversationSplitters.Full,
+        };
+
+        // Act — explicit LastTurn overrides Full
+        var (query, response) = item.Split(ConversationSplitters.LastTurn);
+
+        // Assert — LastTurn behavior
+        Assert.Equal(5, query.Count);
+        Assert.Single(response);
+    }
+
+    [Fact]
+    public void Split_WithToolMessages_PreservesToolPairs()
+    {
+        // Arrange
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.User, "What's the weather?"),
+            new(ChatRole.Assistant, new List<AIContent>
+            {
+                new FunctionCallContent("c1", "get_weather", new Dictionary<string, object?> { ["city"] = "Seattle" }),
+            }),
+            new(ChatRole.Tool, new List<AIContent>
+            {
+                new FunctionResultContent("c1", "62°F, cloudy"),
+            }),
+            new(ChatRole.Assistant, "Seattle is 62°F and cloudy."),
+            new(ChatRole.User, "Thanks!"),
+            new(ChatRole.Assistant, "You're welcome!"),
+        };
+
+        var item = new EvalItem("Thanks!", "You're welcome!", conversation);
+
+        // Act
+        var (query, response) = item.Split(ConversationSplitters.LastTurn);
+
+        // Assert — tool messages stay in query context
+        Assert.Equal(5, query.Count);
+        Assert.Equal(ChatRole.Tool, query[2].Role);
+        Assert.Single(response);
+    }
+
+    [Fact]
+    public void ConversationSplitters_LastTurn_CanBeUsedAsCustomFallback()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+
+        // Act — use ConversationSplitters.LastTurn directly
+        var (query, response) = ConversationSplitters.LastTurn.Split(conversation);
+
+        // Assert
+        Assert.Equal(5, query.Count);
+        Assert.Single(response);
+    }
+
+    // ---------------------------------------------------------------
+    // PerTurnItems tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void PerTurnItems_SplitsMultiTurnConversation()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+
+        // Act
+        var items = EvalItem.PerTurnItems(conversation);
+
+        // Assert — 3 user messages = 3 items
+        Assert.Equal(3, items.Count);
+
+        // First turn: "What's the weather in Seattle?"
+        Assert.Contains("Seattle", items[0].Query);
+        Assert.Contains("62°F", items[0].Response);
+        Assert.Equal(2, items[0].Conversation.Count);
+
+        // Second turn: "And Paris?"
+        Assert.Contains("Paris", items[1].Query);
+        Assert.Contains("68°F", items[1].Response);
+        Assert.Equal(4, items[1].Conversation.Count);
+
+        // Third turn: "Compare them."
+        Assert.Contains("Compare", items[2].Query);
+        Assert.Contains("cooler", items[2].Response);
+        Assert.Equal(6, items[2].Conversation.Count);
+    }
+
+    [Fact]
+    public void PerTurnItems_PropagatesToolsAndContext()
+    {
+        // Arrange
+        var conversation = CreateMultiTurnConversation();
+
+        // Act
+        var items = EvalItem.PerTurnItems(
+            conversation,
+            context: "Weather database");
+
+        // Assert
+        Assert.All(items, item => Assert.Equal("Weather database", item.Context));
+    }
+
+    [Fact]
+    public void PerTurnItems_SingleTurn_ReturnsOneItem()
+    {
+        // Arrange
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.User, "Hello"),
+            new(ChatRole.Assistant, "Hi there!"),
+        };
+
+        // Act
+        var items = EvalItem.PerTurnItems(conversation);
+
+        // Assert
+        Assert.Single(items);
+        Assert.Equal("Hello", items[0].Query);
+        Assert.Equal("Hi there!", items[0].Response);
+    }
+
+    // ---------------------------------------------------------------
+    // Custom IConversationSplitter tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void Split_CustomSplitter_IsUsed()
+    {
+        // Arrange — splitter that splits before a tool call message
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.User, "Remember this"),
+            new(ChatRole.Assistant, "Storing..."),
+            new(ChatRole.User, "What did I say?"),
+            new(ChatRole.Assistant, new List<AIContent>
+            {
+                new FunctionCallContent("c1", "retrieve_memory"),
+            }),
+            new(ChatRole.Tool, new List<AIContent>
+            {
+                new FunctionResultContent("c1", "You said: Remember this"),
+            }),
+            new(ChatRole.Assistant, "You said 'Remember this'."),
+        };
+
+        var splitter = new MemorySplitter();
+        var item = new EvalItem("What did I say?", "You said 'Remember this'.", conversation);
+
+        // Act
+        var (query, response) = item.Split(splitter);
+
+        // Assert — split before the tool call
+        Assert.Equal(3, query.Count);
+        Assert.Equal(3, response.Count);
+    }
+
+    [Fact]
+    public void Split_CustomSplitter_WorksAsItemProperty()
+    {
+        // Arrange — custom splitter set on the item (simulating call-site override)
+        var conversation = new List<ChatMessage>
+        {
+            new(ChatRole.User, "Remember this"),
+            new(ChatRole.Assistant, "Storing..."),
+            new(ChatRole.User, "What did I say?"),
+            new(ChatRole.Assistant, new List<AIContent>
+            {
+                new FunctionCallContent("c1", "retrieve_memory"),
+            }),
+            new(ChatRole.Tool, new List<AIContent>
+            {
+                new FunctionResultContent("c1", "You said: Remember this"),
+            }),
+            new(ChatRole.Assistant, "You said 'Remember this'."),
+        };
+
+        var item = new EvalItem("What did I say?", "You said 'Remember this'.", conversation)
+        {
+            Splitter = new MemorySplitter(),
+        };
+
+        // Act — no explicit splitter, uses item.Splitter
+        var (query, response) = item.Split();
+
+        // Assert — custom splitter was used
+        Assert.Equal(3, query.Count);
+        Assert.Equal(3, response.Count);
+    }
+
+    private sealed class MemorySplitter : IConversationSplitter
+    {
+        public (IReadOnlyList<ChatMessage> QueryMessages, IReadOnlyList<ChatMessage> ResponseMessages) Split(
+            IReadOnlyList<ChatMessage> conversation)
+        {
+            for (int i = 0; i < conversation.Count; i++)
+            {
+                var msg = conversation[i];
+                if (msg.Role == ChatRole.Assistant && msg.Contents != null)
+                {
+                    foreach (var content in msg.Contents)
+                    {
+                        if (content is FunctionCallContent fc && fc.Name == "retrieve_memory")
+                        {
+                            return (
+                                conversation.Take(i).ToList(),
+                                conversation.Skip(i).ToList());
+                        }
+                    }
+                }
+            }
+
+            // Fallback to last-turn split
+            return ConversationSplitters.LastTurn.Split(conversation);
+        }
+    }
+
+    // ---------------------------------------------------------------
+    // ExpectedToolCall tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void ExpectedToolCall_NameOnly()
+    {
+        var tc = new ExpectedToolCall("get_weather");
+        Assert.Equal("get_weather", tc.Name);
+        Assert.Null(tc.Arguments);
+    }
+
+    [Fact]
+    public void ExpectedToolCall_NameAndArgs()
+    {
+        var args = new Dictionary<string, object> { ["location"] = "NYC" };
+        var tc = new ExpectedToolCall("get_weather", args);
+        Assert.Equal("get_weather", tc.Name);
+        Assert.NotNull(tc.Arguments);
+        Assert.Equal("NYC", tc.Arguments["location"]);
+    }
+
+    [Fact]
+    public void EvalItem_ExpectedToolCalls_DefaultNull()
+    {
+        var item = CreateItem();
+        Assert.Null(item.ExpectedToolCalls);
+    }
+
+    [Fact]
+    public void EvalItem_ExpectedToolCalls_CanBeSet()
+    {
+        var item = CreateItem();
+        item.ExpectedToolCalls = new List<ExpectedToolCall>
+        {
+            new("get_weather", new Dictionary<string, object> { ["location"] = "NYC" }),
+            new("book_flight"),
+        };
+
+        Assert.NotNull(item.ExpectedToolCalls);
+        Assert.Equal(2, item.ExpectedToolCalls.Count);
+        Assert.Equal("get_weather", item.ExpectedToolCalls[0].Name);
+        Assert.Null(item.ExpectedToolCalls[1].Arguments);
+    }
+
+    [Fact]
+    public async Task LocalEvaluator_PopulatesInputItems_ForAuditingAsync()
+    {
+        // Arrange
+        var check = FunctionEvaluator.Create("is_sunny",
+            (string response) => response.Contains("sunny", StringComparison.OrdinalIgnoreCase));
+
+        var evaluator = new LocalEvaluator(check);
+        var items = new List<EvalItem>
+        {
+            CreateItem(query: "Weather?", response: "It's sunny!"),
+            CreateItem(query: "Temp?", response: "72 degrees"),
+        };
+
+        // Act
+        var results = await evaluator.EvaluateAsync(items);
+
+        // Assert — InputItems carries the original query/response for auditing
+        Assert.NotNull(results.InputItems);
+        Assert.Equal(2, results.InputItems.Count);
+        Assert.Equal("Weather?", results.InputItems[0].Query);
+        Assert.Equal("It's sunny!", results.InputItems[0].Response);
+        Assert.Equal("Temp?", results.InputItems[1].Query);
+        Assert.Equal("72 degrees", results.InputItems[1].Response);
+
+        // Results and InputItems are positionally correlated
+        Assert.Equal(results.Items.Count, results.InputItems.Count);
+    }
+
+    // ---------------------------------------------------------------
+    // AgentEvaluationResults tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void AllPassed_EmptyItems_NoSubResults_ReturnsFalseAsync()
+    {
+        var results = new AgentEvaluationResults("test", Array.Empty<EvaluationResult>());
+        Assert.False(results.AllPassed);
+        Assert.Equal(0, results.Total);
+    }
+
+    [Fact]
+    public void AllPassed_SubResultsAllPass_OverallFails_ReturnsFalseAsync()
+    {
+        // Overall has a failing item
+        var failMetric = new BooleanMetric("check", false)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Unacceptable,
+                Failed = true,
+            },
+        };
+        var failResult = new EvaluationResult();
+        failResult.Metrics["check"] = failMetric;
+
+        var overall = new AgentEvaluationResults("test", new[] { failResult });
+
+        // Sub-results all pass
+        var passMetric = new BooleanMetric("check", true)
+        {
+            Interpretation = new EvaluationMetricInterpretation
+            {
+                Rating = EvaluationRating.Good,
+                Failed = false,
+            },
+        };
+        var passResult = new EvaluationResult();
+        passResult.Metrics["check"] = passMetric;
+
+        overall.SubResults = new Dictionary<string, AgentEvaluationResults>
+        {
+            ["agent1"] = new AgentEvaluationResults("sub", new[] { passResult }),
+        };
+
+        // Overall has a failing item, so AllPassed should be false
+        Assert.False(overall.AllPassed);
+    }
+
+    // ---------------------------------------------------------------
+    // BuildItemsFromResponses validation tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void BuildEvalItem_SetsPropertiesCorrectly()
+    {
+        var userMsg = new ChatMessage(ChatRole.User, "test query");
+        var assistantMsg = new ChatMessage(ChatRole.Assistant, "response");
+        var inputMessages = new List<ChatMessage> { userMsg };
+        var response = new AgentResponse(assistantMsg);
+
+        var item = AgentEvaluationExtensions.BuildEvalItem("test query", response, inputMessages, null!);
+
+        Assert.Equal("test query", item.Query);
+        Assert.NotNull(item.RawResponse);
+    }
+
+    [Fact]
+    public void BuildEvalItem_DoesNotMutateInputMessages()
+    {
+        // Arrange
+        var userMsg = new ChatMessage(ChatRole.User, "hello");
+        var assistantMsg = new ChatMessage(ChatRole.Assistant, "world");
+        var inputMessages = new List<ChatMessage> { userMsg };
+        var response = new AgentResponse(assistantMsg);
+
+        // Act
+        var item = AgentEvaluationExtensions.BuildEvalItem("hello", response, inputMessages, null!);
+
+        // Assert — input list is not mutated
+        Assert.Single(inputMessages);
+        Assert.Equal(userMsg, inputMessages[0]);
+
+        // But the EvalItem's conversation includes the response message
+        Assert.Equal(2, item.Conversation.Count);
+    }
+
+    // ---------------------------------------------------------------
+    // BuildItemsFromResponses validation tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void BuildItemsFromResponses_MismatchedQueryAndResponseCount_Throws()
+    {
+        var queries = new[] { "q1", "q2" };
+        var responses = new[] { new AgentResponse(new ChatMessage(ChatRole.Assistant, "a1")) };
+
+        var ex = Assert.Throws<ArgumentException>(
+            () => AgentEvaluationExtensions.BuildItemsFromResponses(null!, responses, queries, null, null));
+        Assert.Contains("queries", ex.Message);
+        Assert.Contains("responses", ex.Message);
+    }
+
+    [Fact]
+    public void BuildItemsFromResponses_MismatchedExpectedOutput_Throws()
+    {
+        var queries = new[] { "q1" };
+        var responses = new[] { new AgentResponse(new ChatMessage(ChatRole.Assistant, "a1")) };
+        var expectedOutput = new[] { "e1", "e2" };
+
+        var ex = Assert.Throws<ArgumentException>(
+            () => AgentEvaluationExtensions.BuildItemsFromResponses(null!, responses, queries, expectedOutput, null));
+        Assert.Contains("expectedOutput", ex.Message);
+    }
+
+    [Fact]
+    public void BuildItemsFromResponses_MismatchedExpectedToolCalls_Throws()
+    {
+        var queries = new[] { "q1" };
+        var responses = new[] { new AgentResponse(new ChatMessage(ChatRole.Assistant, "a1")) };
+        var expectedToolCalls = new[] { new[] { new ExpectedToolCall("t1") }, new[] { new ExpectedToolCall("t2") } };
+
+        var ex = Assert.Throws<ArgumentException>(
+            () => AgentEvaluationExtensions.BuildItemsFromResponses(
+                null!, responses, queries, null, expectedToolCalls));
+        Assert.Contains("expectedToolCalls", ex.Message);
+    }
+
+    // ---------------------------------------------------------------
+    // FoundryEvals.BuildEvaluators tests
+    // ---------------------------------------------------------------
+
+    [Fact]
+    public void BuildEvaluators_QualityNames_ReturnsDistinctEvaluators()
+    {
+        var evaluators = AzureAI.FoundryEvals.BuildEvaluators(
+            new[] { AzureAI.FoundryEvals.Relevance, AzureAI.FoundryEvals.Coherence });
+
+        Assert.Equal(2, evaluators.Count);
+    }
+
+    [Fact]
+    public void BuildEvaluators_MultipleSafetyNames_SingleContentHarmEvaluator()
+    {
+        var evaluators = AzureAI.FoundryEvals.BuildEvaluators(
+            new[]
+            {
+                AzureAI.FoundryEvals.Violence,
+                AzureAI.FoundryEvals.Sexual,
+                AzureAI.FoundryEvals.SelfHarm,
+                AzureAI.FoundryEvals.HateUnfairness,
+            });
+
+        // All four safety names produce exactly one ContentHarmEvaluator
+        Assert.Single(evaluators);
+    }
+
+    [Fact]
+    public void BuildEvaluators_UnknownName_ThrowsArgumentException()
+    {
+        var names = new[] { "gobblygook" };
+        var ex = Assert.Throws<ArgumentException>(
+            () => AzureAI.FoundryEvals.BuildEvaluators(names));
+        Assert.Contains("gobblygook", ex.Message);
+        Assert.Contains("not supported", ex.Message, StringComparison.OrdinalIgnoreCase);
+    }
+
+    [Fact]
+    public void BuildEvaluators_DefaultSelection_ReturnsRelevanceAndCoherence()
+    {
+        // Default evaluator names when constructor receives empty array
+        var defaults = new[] { AzureAI.FoundryEvals.Relevance, AzureAI.FoundryEvals.Coherence };
+        var evaluators = AzureAI.FoundryEvals.BuildEvaluators(defaults);
+
+        Assert.Equal(2, evaluators.Count);
+    }
+}
diff --git a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Microsoft.Agents.AI.UnitTests.csproj b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Microsoft.Agents.AI.UnitTests.csproj
index ffa4417f34..8e1dba18bd 100644
--- a/dotnet/tests/Microsoft.Agents.AI.UnitTests/Microsoft.Agents.AI.UnitTests.csproj
+++ b/dotnet/tests/Microsoft.Agents.AI.UnitTests/Microsoft.Agents.AI.UnitTests.csproj
@@ -13,6 +13,16 @@
     <ProjectReference Include="..\..\src\Microsoft.Agents.AI.CopilotStudio\Microsoft.Agents.AI.CopilotStudio.csproj" />
   </ItemGroup>
 
+  <!-- AzureAI reference for FoundryEvals tests (evaluation types require net8.0+) -->
+  <ItemGroup Condition="$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <ProjectReference Include="..\..\src\Microsoft.Agents.AI.AzureAI\Microsoft.Agents.AI.AzureAI.csproj" />
+  </ItemGroup>
+
+  <!-- Evaluation tests require net8.0+ (MEAI.Evaluation does not support legacy TFMs) -->
+  <ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <Compile Remove="EvaluationTests.cs" />
+  </ItemGroup>
+
   <ItemGroup>
     <PackageReference Include="Microsoft.Extensions.DependencyInjection" />
     <PackageReference Include="Microsoft.Extensions.Logging" />
diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Microsoft.Agents.AI.Workflows.UnitTests.csproj b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Microsoft.Agents.AI.Workflows.UnitTests.csproj
index 58979a4f1b..6adedab6c3 100644
--- a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Microsoft.Agents.AI.Workflows.UnitTests.csproj
+++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/Microsoft.Agents.AI.Workflows.UnitTests.csproj
@@ -4,6 +4,11 @@
     <NoWarn>$(NoWarn);MEAI001</NoWarn>
   </PropertyGroup>
 
+  <!-- Evaluation tests require net8.0+ (MEAI.Evaluation does not support legacy TFMs) -->
+  <ItemGroup Condition="!$([MSBuild]::IsTargetFrameworkCompatible('$(TargetFramework)', 'net8.0'))">
+    <Compile Remove="WorkflowEvaluationTests.cs" />
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\..\src\Microsoft.Agents.AI\Microsoft.Agents.AI.csproj" />
     <ProjectReference Include="..\..\src\Microsoft.Agents.AI.Workflows\Microsoft.Agents.AI.Workflows.csproj" />
diff --git a/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/WorkflowEvaluationTests.cs b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/WorkflowEvaluationTests.cs
new file mode 100644
index 0000000000..1ab7e71a82
--- /dev/null
+++ b/dotnet/tests/Microsoft.Agents.AI.Workflows.UnitTests/WorkflowEvaluationTests.cs
@@ -0,0 +1,156 @@
+﻿// Copyright (c) Microsoft. All rights reserved.
+
+using System.Collections.Generic;
+
+namespace Microsoft.Agents.AI.Workflows.UnitTests;
+
+/// <summary>
+/// Tests for <see cref="WorkflowEvaluationExtensions.ExtractAgentData"/>.
+/// </summary>
+public sealed class WorkflowEvaluationTests
+{
+    [Fact]
+    public void ExtractAgentData_EmptyEvents_ReturnsEmpty()
+    {
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(new List<WorkflowEvent>(), splitter: null);
+
+        Assert.Empty(result);
+    }
+
+    [Fact]
+    public void ExtractAgentData_MatchedPair_ReturnsItem()
+    {
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "What is the weather?"),
+            new ExecutorCompletedEvent("agent-1", "It's sunny."),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Single(result);
+        Assert.True(result.ContainsKey("agent-1"));
+        Assert.Single(result["agent-1"]);
+        Assert.Equal("What is the weather?", result["agent-1"][0].Query);
+        Assert.Equal("It's sunny.", result["agent-1"][0].Response);
+        Assert.Equal(2, result["agent-1"][0].Conversation.Count);
+    }
+
+    [Fact]
+    public void ExtractAgentData_UnmatchedInvocation_NotIncluded()
+    {
+        // An invocation without a matching completion should not appear in results
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "Hello"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Empty(result);
+    }
+
+    [Fact]
+    public void ExtractAgentData_CompletionWithoutInvocation_NotIncluded()
+    {
+        // A completion without a prior invocation should not appear in results
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorCompletedEvent("agent-1", "Response"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Empty(result);
+    }
+
+    [Fact]
+    public void ExtractAgentData_MultipleAgents_SeparatedByExecutorId()
+    {
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "Q1"),
+            new ExecutorInvokedEvent("agent-2", "Q2"),
+            new ExecutorCompletedEvent("agent-1", "A1"),
+            new ExecutorCompletedEvent("agent-2", "A2"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Equal(2, result.Count);
+        Assert.Equal("Q1", result["agent-1"][0].Query);
+        Assert.Equal("A1", result["agent-1"][0].Response);
+        Assert.Equal("Q2", result["agent-2"][0].Query);
+        Assert.Equal("A2", result["agent-2"][0].Response);
+    }
+
+    [Fact]
+    public void ExtractAgentData_DuplicateExecutorId_LastInvocationUsed()
+    {
+        // If the same executor is invoked twice before completing,
+        // the second invocation overwrites the first
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "First question"),
+            new ExecutorInvokedEvent("agent-1", "Second question"),
+            new ExecutorCompletedEvent("agent-1", "Answer"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Single(result);
+        Assert.Single(result["agent-1"]);
+        Assert.Equal("Second question", result["agent-1"][0].Query);
+    }
+
+    [Fact]
+    public void ExtractAgentData_MultipleRoundsForSameExecutor_AllCaptured()
+    {
+        // Same executor invoked→completed twice (sequential rounds)
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "Q1"),
+            new ExecutorCompletedEvent("agent-1", "A1"),
+            new ExecutorInvokedEvent("agent-1", "Q2"),
+            new ExecutorCompletedEvent("agent-1", "A2"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Single(result); // one executor
+        Assert.Equal(2, result["agent-1"].Count); // two items
+        Assert.Equal("Q1", result["agent-1"][0].Query);
+        Assert.Equal("Q2", result["agent-1"][1].Query);
+    }
+
+    [Fact]
+    public void ExtractAgentData_NullData_UsesEmptyString()
+    {
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", null!),
+            new ExecutorCompletedEvent("agent-1", null),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter: null);
+
+        Assert.Single(result);
+        Assert.Equal(string.Empty, result["agent-1"][0].Query);
+        Assert.Equal(string.Empty, result["agent-1"][0].Response);
+    }
+
+    [Fact]
+    public void ExtractAgentData_WithSplitter_SetOnItems()
+    {
+        var splitter = ConversationSplitters.LastTurn;
+        var events = new List<WorkflowEvent>
+        {
+            new ExecutorInvokedEvent("agent-1", "Q"),
+            new ExecutorCompletedEvent("agent-1", "A"),
+        };
+
+        var result = WorkflowEvaluationExtensions.ExtractAgentData(events, splitter);
+
+        Assert.Equal(splitter, result["agent-1"][0].Splitter);
+    }
+}