Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
0f0e9db
Foundry Evals integration for Python
Mar 17, 2026
999d163
fix: resolve mypy redundant-cast errors while keeping pyright happy
Mar 20, 2026
41357a5
fix: CI failures — pyupgrade, evaluator overloads, sample API, reset …
Mar 20, 2026
ce21c83
fix: skip gRPC-dependent observability test
Mar 23, 2026
f296726
fix: add nosec B101 for bandit assert check
Mar 23, 2026
a6e3462
style: align eval samples with repo conventions
Mar 23, 2026
9f1341f
Address PR review feedback: async fixes, sample bugs, deprecation war…
Mar 23, 2026
fbaf285
Add test coverage for review feedback items
Mar 23, 2026
b8936cc
Fix ruff ISC004 lint error and apply formatter
Mar 23, 2026
5b51f00
Remove core type changes (extracted to fix/workflow-stale-session bra…
Mar 23, 2026
bf8c50a
Address PR review round 2: bugs, tests, and architecture
Mar 23, 2026
426cce3
Fix CI: revert test_full_conversation, fix pyright errors
Mar 24, 2026
0dcac91
Address PR review round 3: reliability, test gaps, cleanup
Mar 24, 2026
93a47ab
Fix CI: ruff S101 assert, pyright and mypy arg-type errors
Mar 24, 2026
af25546
Address PR review round 4: bugs, reliability, test fixes
Mar 24, 2026
39ade2f
Address PR review round 5: reliability, docs, test coverage
Mar 24, 2026
e1e232b
Fix error message: evaluate_responses() → evaluate_traces(response_id…
Mar 24, 2026
c4de7e0
Remove dead to_eval_data() method, fix docstring claims
Mar 24, 2026
834fd07
Reduce default eval timeout from 600s to 180s (3 minutes)
Mar 24, 2026
b00412a
Remove dead _evaluate_via_responses method from FoundryEvals
Mar 25, 2026
b568898
Revert unrelated formatting changes to get-started samples
Mar 25, 2026
1391e29
Fix pyright: remove phantom FoundryMemoryProvider import, apply ruff …
Mar 25, 2026
0f91238
Fix eval samples: use FoundryChatClient for Agent()
Mar 25, 2026
1438dc8
Simplify self-reflection sample using FoundryEvals
Mar 25, 2026
2ff2761
Update eval samples to FoundryChatClient and FOUNDRY_PROJECT_ENDPOINT
Mar 25, 2026
641c25a
Fix lint errors in eval samples (E501, ASYNC240, formatting)
Mar 25, 2026
8288bd9
Remove evaluate_all_patterns_sample.py (redundant with focused samples)
Mar 25, 2026
e820a54
Fix async credential mismatch: use azure.identity.aio for async AIPro…
Mar 25, 2026
9c050ef
Revert test_observability.py to upstream/main (not our test)
Mar 25, 2026
966f5a1
Address moonbox3 review: sphinx docstrings, pagination, isinstance check
Mar 26, 2026
9268b65
Fix test failures and address remaining moonbox3 review comments
Mar 26, 2026
1a36627
Address Tao's PR review comments on Foundry Evals
Mar 26, 2026
1af02d0
Remove unnecessary Any union from run type annotations
Mar 26, 2026
0252c5b
Accept FoundryChatClient instead of raw AsyncOpenAI
Mar 26, 2026
1156a34
Remove implicit 60s upper bound on poll interval
Mar 27, 2026
b5142f1
Remove 1s floor on poll interval — let the developer control it
Mar 27, 2026
d0a57ef
Update python/samples/05-end-to-end/evaluation/foundry_evals/.env.exa…
alliscode Mar 27, 2026
2d4fb5f
Update python/samples/02-agents/evaluation/evaluate_agent.py
alliscode Mar 27, 2026
b67f22c
Address eavanvalkenburg review (round 2) on Python eval PR
Mar 27, 2026
b63dd34
Move FoundryEvals to foundry package, split tool eval sample
Mar 27, 2026
47d05e2
Auto-create FoundryChatClient from env vars when no client provided
Mar 27, 2026
fde1bb9
Fix pyright errors: remove dead _normalize_queries, suppress EvalAPIE…
Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/decisions/0020-foundry-evals-integration.md
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ class FoundryEvals:
### Azure AI: FoundryEvals Constants

```python
from agent_framework_azure_ai import FoundryEvals
from agent_framework.foundry import FoundryEvals

evaluators = [FoundryEvals.RELEVANCE, FoundryEvals.TOOL_CALL_ACCURACY]
```
Expand Down
40 changes: 40 additions & 0 deletions python/packages/core/agent_framework/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,27 @@
included_messages,
included_token_count,
)
from ._evaluation import (
AgentEvalConverter,
CheckResult,
ConversationSplit,
ConversationSplitter,
EvalItem,
EvalItemResult,
EvalNotPassedError,
EvalResults,
EvalScoreResult,
Evaluator,
ExpectedToolCall,
LocalEvaluator,
evaluate_agent,
evaluate_workflow,
evaluator,
keyword_check,
tool_call_args_match,
tool_called_check,
tool_calls_present,
)
from ._mcp import MCPStdioTool, MCPStreamableHTTPTool, MCPWebsocketTool
from ._middleware import (
AgentContext,
Expand Down Expand Up @@ -242,6 +263,7 @@
"USER_AGENT_TELEMETRY_DISABLED_ENV_VAR",
"Agent",
"AgentContext",
"AgentEvalConverter",
"AgentExecutor",
"AgentExecutorRequest",
"AgentExecutorResponse",
Expand All @@ -268,11 +290,14 @@
"ChatOptions",
"ChatResponse",
"ChatResponseUpdate",
"CheckResult",
"CheckpointStorage",
"CompactionProvider",
"CompactionStrategy",
"Content",
"ContinuationToken",
"ConversationSplit",
"ConversationSplitter",
"Default",
"Edge",
"EdgeCondition",
Expand All @@ -281,7 +306,14 @@
"EmbeddingGenerationOptions",
"EmbeddingInputT",
"EmbeddingT",
"EvalItem",
"EvalItemResult",
"EvalNotPassedError",
"EvalResults",
"EvalScoreResult",
"Evaluator",
"Executor",
"ExpectedToolCall",
"FanInEdgeGroup",
"FanOutEdgeGroup",
"FileCheckpointStorage",
Expand All @@ -300,6 +332,7 @@
"InMemoryCheckpointStorage",
"InMemoryHistoryProvider",
"InProcRunnerContext",
"LocalEvaluator",
"MCPStdioTool",
"MCPStreamableHTTPTool",
"MCPWebsocketTool",
Expand Down Expand Up @@ -379,11 +412,15 @@
"chat_middleware",
"create_edge_runner",
"detect_media_type_from_base64",
"evaluate_agent",
"evaluate_workflow",
"evaluator",
"executor",
"function_middleware",
"handler",
"included_messages",
"included_token_count",
"keyword_check",
"load_settings",
"map_chat_to_agent_update",
"merge_chat_options",
Expand All @@ -396,6 +433,9 @@
"resolve_agent_id",
"response_handler",
"tool",
"tool_call_args_match",
"tool_called_check",
"tool_calls_present",
"validate_chat_options",
"validate_tool_mode",
"validate_tools",
Expand Down
Loading
Loading