From d0ec9d05165f2818ca103f0496eb4b23153cd3b7 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Mon, 8 Dec 2025 23:19:57 +0000 Subject: [PATCH 01/10] feat: Implement Enterprise Chat Agent with Azure Functions - Added demo HTTP requests for the Enterprise Chat Agent API. - Developed the main function app with HTTP triggers for thread management, message handling, and health checks. - Integrated Cosmos DB for persistent storage of threads and messages. - Created tools for weather information, calculations, and knowledge base searches. - Configured infrastructure using Bicep templates for Azure resources including Function App, Cosmos DB, Application Insights, and Log Analytics. - Added example local settings for development and updated requirements for necessary packages. --- .../demos/enterprise-chat-agent/.gitignore | 61 +++ .../demos/enterprise-chat-agent/DESIGN.md | 478 ++++++++++++++++++ .../demos/enterprise-chat-agent/README.md | 202 ++++++++ .../demos/enterprise-chat-agent/azure.yaml | 12 + .../enterprise-chat-agent/cosmos_store.py | 232 +++++++++ .../demos/enterprise-chat-agent/demo.http | 83 +++ .../enterprise-chat-agent/function_app.py | 369 ++++++++++++++ .../demos/enterprise-chat-agent/host.json | 15 + .../infra/abbreviations.json | 8 + .../infra/core/database/cosmos-nosql.bicep | 118 +++++ .../infra/core/host/function-app.bicep | 188 +++++++ .../infra/core/monitor/monitoring.bicep | 55 ++ .../infra/core/storage/storage-account.bicep | 59 +++ .../enterprise-chat-agent/infra/main.bicep | 140 +++++ .../infra/main.parameters.json | 18 + .../local.settings.json.example | 13 + .../enterprise-chat-agent/requirements.txt | 17 + .../enterprise-chat-agent/tools/__init__.py | 16 + .../enterprise-chat-agent/tools/calculator.py | 81 +++ .../tools/knowledge_base.py | 90 ++++ .../enterprise-chat-agent/tools/weather.py | 34 ++ 21 files changed, 2289 insertions(+) create mode 100644 python/samples/demos/enterprise-chat-agent/.gitignore create mode 100644 python/samples/demos/enterprise-chat-agent/DESIGN.md create mode 100644 python/samples/demos/enterprise-chat-agent/README.md create mode 100644 python/samples/demos/enterprise-chat-agent/azure.yaml create mode 100644 python/samples/demos/enterprise-chat-agent/cosmos_store.py create mode 100644 python/samples/demos/enterprise-chat-agent/demo.http create mode 100644 python/samples/demos/enterprise-chat-agent/function_app.py create mode 100644 python/samples/demos/enterprise-chat-agent/host.json create mode 100644 python/samples/demos/enterprise-chat-agent/infra/abbreviations.json create mode 100644 python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep create mode 100644 python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep create mode 100644 python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep create mode 100644 python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep create mode 100644 python/samples/demos/enterprise-chat-agent/infra/main.bicep create mode 100644 python/samples/demos/enterprise-chat-agent/infra/main.parameters.json create mode 100644 python/samples/demos/enterprise-chat-agent/local.settings.json.example create mode 100644 python/samples/demos/enterprise-chat-agent/requirements.txt create mode 100644 python/samples/demos/enterprise-chat-agent/tools/__init__.py create mode 100644 python/samples/demos/enterprise-chat-agent/tools/calculator.py create mode 100644 python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py create mode 100644 python/samples/demos/enterprise-chat-agent/tools/weather.py diff --git a/python/samples/demos/enterprise-chat-agent/.gitignore b/python/samples/demos/enterprise-chat-agent/.gitignore new file mode 100644 index 0000000000..02fc31821c --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/.gitignore @@ -0,0 +1,61 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# Azure Functions +local.settings.json +.python_packages/ +.func/ + +# Azure Developer CLI +.azure/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ +.nox/ + +# Logs +*.log +logs/ + +# Environment variables +.env +.env.local diff --git a/python/samples/demos/enterprise-chat-agent/DESIGN.md b/python/samples/demos/enterprise-chat-agent/DESIGN.md new file mode 100644 index 0000000000..611cc8a611 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/DESIGN.md @@ -0,0 +1,478 @@ +--- +status: proposed +contact: @vj-msft +date: 2024-12-06 +deciders: TBD +consulted: TBD +informed: TBD +--- + +# Production Chat API with Azure Functions, Cosmos DB & Agent Framework + +## References + +- **GitHub Issue**: [#2436 - Python: [Sample Request] Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) +- **Microsoft Documentation**: + - [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) + - [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) + - [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) + - [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) + +## What is the goal of this feature? + +Provide a **production-ready sample** demonstrating how to build a scalable Chat API using the Microsoft Agent Framework with: + +1. **Azure Functions** for serverless, scalable hosting +2. **Azure Cosmos DB** for durable conversation persistence +3. **Function Tools** showcasing runtime tool selection by the agent + +### Value Proposition + +- Developers can use this sample as a reference architecture for deploying Agent Framework in production +- Demonstrates enterprise patterns: state persistence, observability, and thread-based conversations +- Shows the power of **agent autonomy** - the agent decides which tools to invoke at runtime based on conversation context + +### Success Metrics + +1. Sample is referenced in at least 3 external blog posts/tutorials within 6 months +2. Sample serves as the canonical reference for "Agent Framework + Azure Functions + Cosmos DB" stack + +## What is the problem being solved? + +### Current Pain Points + +1. **No production-ready Python sample exists** - Existing samples focus on getting started scenarios, not production deployment +2. **Gap in persistence guidance** - .NET has `CosmosNoSql` package, Python has no equivalent sample or implementation +3. **Tool selection patterns unclear** - Developers need to see how agents autonomously select tools at runtime + +### Why is this hard today? + +- Developers must piece together patterns from multiple sources +- No reference implementation for Cosmos DB persistence in Python +- Azure Functions + Agent Framework integration patterns are spread across docs + +## Architecture Overview + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ Client Applications │ +│ (Web, Mobile, CLI, Postman, etc.) │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────────────────────────────────────┐ +│ Azure Functions (Flex Consumption) │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ HTTP Trigger Endpoints │ │ +│ │ POST /api/chat/{thread_id} - Send message │ │ +│ │ GET /api/chat/{thread_id} - Get thread history │ │ +│ │ POST /api/threads - Create new thread │ │ +│ │ DELETE /api/threads/{id} - Delete thread │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ ChatAgent │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ +│ │ │ WeatherTool │ │ SearchTool │ │ CalculatorTool │ │ │ +│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │ +│ │ │ │ +│ │ Agent autonomously selects tools based on user intent │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ + │ + ┌────────────────┼────────────────┐ + ▼ ▼ ▼ + ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ + │ Azure OpenAI │ │ Cosmos DB │ │ App │ + │ (GPT-4o) │ │ (NoSQL) │ │ Insights │ + │ │ │ │ │ │ + │ Chat model │ │ Threads & │ │ Telemetry │ + │ completions │ │ Messages │ │ & Tracing │ + └──────────────┘ └──────────────┘ └──────────────┘ +``` + +## Key Design Decisions + +### 1. Runtime Tool Selection (Agent Autonomy) + +The agent is configured with multiple tools but **decides at runtime** which tool(s) to invoke: + +```python +# Tools are registered, but agent decides when to use them +agent = ChatAgent( + chat_client=azure_openai_client, + instructions="You are a helpful assistant. Use available tools when needed.", + tools=[ + get_weather, # Weather information + search_web, # Web search + calculate, # Math operations + get_stock_price, # Stock quotes + ] +) + +# User asks: "What's the weather in Seattle and what's 15% tip on $85?" +# Agent autonomously invokes: get_weather("Seattle") AND calculate("85 * 0.15") +``` + +### 2. Cosmos DB Persistence Strategy + +**Data Model**: One document per message (optimized for append-heavy workloads) + +```json +{ + "id": "msg_abc123", + "thread_id": "thread_xyz789", + "role": "user", + "content": "What's the weather in Seattle?", + "timestamp": "2024-12-06T10:30:00Z", + "metadata": { + "tool_calls": null, + "model": null + } +} +``` + +**Partition Strategy**: + +- **Partition Key**: `/thread_id` (optimal for retrieving all messages in a conversation) +- All messages for a thread are stored together, enabling efficient queries + +### 3. Azure Functions Hosting + +Using **HTTP Triggers** for a familiar REST API pattern: + +- Standard HTTP trigger endpoints (POST, GET, DELETE) +- Explicit state management via Cosmos DB +- Flex Consumption plan for serverless scaling (0 to thousands of instances) +- Simple deployment model using Azure Functions Core Tools or `azd` + +### 4. Simple Thread-Based Architecture + +```python +# Thread isolation via partition key +async def get_thread_messages(thread_id: str): + query = "SELECT * FROM c WHERE c.thread_id = @thread_id ORDER BY c.timestamp" + return await container.query_items( + query=query, + parameters=[{"name": "@thread_id", "value": thread_id}], + partition_key=thread_id # Scoped to thread's partition + ) +``` + +## API Design + +### Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/threads` | Create a new conversation thread | +| `GET` | `/api/threads/{thread_id}` | Get thread metadata | +| `DELETE` | `/api/threads/{thread_id}` | Delete a thread and its messages | +| `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | +| `GET` | `/api/threads/{thread_id}/messages` | Get conversation history | + +### Request/Response Examples + +**Create Thread** + +```http +POST /api/threads + +{ + "metadata": { + "user_id": "user_123", + "session_type": "support" + } +} +``` + +**Send Message** + +```http +POST /api/threads/thread_xyz789/messages +Content-Type: application/json + +{ + "content": "What's the weather in Seattle and calculate 15% tip on $85?" +} +``` + +**Response** (with tool usage) + +```json +{ + "id": "msg_resp_456", + "thread_id": "thread_xyz789", + "role": "assistant", + "content": "The weather in Seattle is 52°F with light rain. A 15% tip on $85 is $12.75.", + "tool_calls": [ + { + "tool": "get_weather", + "arguments": {"location": "Seattle"}, + "result": {"temp": 52, "condition": "light rain"} + }, + { + "tool": "calculate", + "arguments": {"expression": "85 * 0.15"}, + "result": 12.75 + } + ], + "timestamp": "2024-12-06T10:30:05Z" +} +``` + +## E2E Code Samples + +### Basic Usage (Phase 1) + +```python +from azure.identity import DefaultAzureCredential +from microsoft.agents.ai.azure import AzureOpenAIChatClient +from microsoft.agents.core import ChatAgent + +# Initialize Azure OpenAI client +credential = DefaultAzureCredential() +chat_client = AzureOpenAIChatClient( + endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], + model=os.environ["AZURE_OPENAI_MODEL"], + credential=credential, +) + +# Define tools - agent will decide when to use them +@ai_function +def get_weather(location: str) -> dict: + """Get current weather for a location.""" + # Implementation here + return {"temp": 52, "condition": "light rain", "location": location} + +@ai_function +def calculate(expression: str) -> float: + """Evaluate a mathematical expression.""" + # Safe evaluation implementation + return eval(expression) # Use safe_eval in production + +@ai_function +def search_knowledge_base(query: str) -> list[dict]: + """Search the knowledge base for relevant information.""" + # Could connect to Azure AI Search, Cosmos DB, etc. + return [{"title": "...", "content": "..."}] + +# Create agent with multiple tools +agent = ChatAgent( + chat_client=chat_client, + instructions="""You are a helpful assistant. + Use the available tools when they can help answer the user's question. + You can use multiple tools in a single response if needed.""", + tools=[get_weather, calculate, search_knowledge_base], +) + +# Agent autonomously decides which tools to use +response = await agent.run("What's the weather in NYC and what's 20% of 150?") +# Agent will call: get_weather("NYC") AND calculate("150 * 0.20") +``` + +### With Cosmos DB Persistence (Phase 2) + +```python +from microsoft.agents.stores.cosmosdb import CosmosDBChatMessageStore + +# Initialize Cosmos DB store +message_store = CosmosDBChatMessageStore( + endpoint=os.environ["COSMOS_ENDPOINT"], + database_name="chat_db", + container_name="messages", + credential=DefaultAzureCredential(), +) + +# Create agent with persistent storage +agent = ChatAgent( + chat_client=chat_client, + instructions="...", + tools=[get_weather, calculate, search_knowledge_base], + message_store=message_store, # Persistent storage +) + +# Messages are automatically persisted +thread_id = "thread_abc123" +response = await agent.run( + "What's the weather?", + thread_id=thread_id, +) +``` + +### Azure Functions Integration (Phase 3) + +```python +# function_app.py +import azure.functions as func +from microsoft.agents.ai.azure import AzureOpenAIChatClient +from microsoft.agents.core import ChatAgent +from microsoft.agents.stores.cosmosdb import CosmosDBChatMessageStore + +app = func.FunctionApp() + +# Singleton instances (reused across invocations) +chat_client = None +message_store = None +agent = None + +def get_agent(): + global chat_client, message_store, agent + if agent is None: + chat_client = AzureOpenAIChatClient(...) + message_store = CosmosDBChatMessageStore(...) + agent = ChatAgent( + chat_client=chat_client, + tools=[get_weather, calculate, search_knowledge_base], + message_store=message_store, + ) + return agent + +@app.route(route="threads/{thread_id}/messages", methods=["POST"]) +async def send_message(req: func.HttpRequest) -> func.HttpResponse: + thread_id = req.route_params.get("thread_id") + body = req.get_json() + + agent = get_agent() + response = await agent.run( + body["content"], + thread_id=thread_id, + ) + + return func.HttpResponse( + body=json.dumps(response.to_dict()), + mimetype="application/json", + ) +``` + +## Phased Implementation Plan + +### Phase 1: Core Chat API with Cosmos DB Persistence ✅ + +**Goal**: Demonstrate runtime tool selection with persistent storage + +- [x] Azure Functions HTTP triggers +- [x] Function tools (weather, calculator, knowledge base) +- [x] Cosmos DB thread and message persistence +- [x] `demo.http` file for testing +- [x] README with setup instructions +- [x] Infrastructure as Code (Bicep + azd) + +**Files**: + +```text +python/samples/demos/enterprise-chat-agent/ +├── README.md +├── requirements.txt +├── local.settings.json.example +├── host.json +├── function_app.py +├── cosmos_store.py # Cosmos DB conversation store +├── tools/ +│ ├── __init__.py +│ ├── weather.py +│ ├── calculator.py +│ └── knowledge_base.py +├── demo.http +├── azure.yaml # azd configuration +└── infra/ + ├── main.bicep + ├── abbreviations.json + └── core/ + ├── database/cosmos-nosql.bicep + ├── host/function-app.bicep + ├── monitor/monitoring.bicep + └── storage/storage-account.bicep +``` + +### Phase 2: Agent Framework Integration (PR #2) + +**Goal**: Integrate with Microsoft Agent Framework + +- [ ] Replace placeholder logic with `ChatAgent` +- [ ] Azure OpenAI integration via Agent Framework +- [ ] Conversation history passed to agent for context +- [ ] Tool execution via Agent Framework runtime + +### Phase 3: Production Hardening (PR #3) + +**Goal**: Enterprise-ready patterns + +- [ ] Managed Identity authentication +- [ ] OpenTelemetry tracing integration +- [ ] Structured logging +- [ ] Health check endpoint +- [ ] Retry policies and error handling + +### Phase 4: Observability Dashboard (PR #4) + +**Goal**: Operational visibility + +- [ ] Application Insights integration +- [ ] Custom metrics (tokens, latency, tool usage) +- [ ] Sample Kusto queries +- [ ] Azure Dashboard template (optional) + +### Phase 5: Redis Caching Extension (Future) + +**Goal**: High-frequency access optimization + +- [ ] Redis session cache +- [ ] Recent messages caching +- [ ] Rate limiting support + +## Security Considerations + +| Concern | Mitigation | +|---------|------------| +| **Authentication** | Azure AD / API Key via `X-API-Key` header | +| **Thread Isolation** | Cosmos DB partition key on `thread_id` | +| **Secrets Management** | Azure Key Vault for connection strings | +| **Network Security** | Private Endpoints for Cosmos DB & OpenAI | +| **Input Validation** | Pydantic models for request validation | + +## Testing Strategy + +1. **Unit Tests**: Tool functions, message store operations +2. **Integration Tests**: Cosmos DB emulator, Azure OpenAI mock +3. **E2E Tests**: Full API flow with `demo.http` +4. **Load Tests**: Azure Load Testing for scale validation + +## Open Questions + +1. **Package location**: Should `CosmosDBChatMessageStore` be a new package or part of existing `stores` package? +2. **Streaming support**: Should Phase 1 include SSE streaming responses? + +## Appendix: Tool Selection Examples + +### Example 1: Single Tool + +```text +User: "What's the weather in Tokyo?" +Agent Decision: → get_weather("Tokyo") +Response: "The weather in Tokyo is 68°F and sunny." +``` + +### Example 2: Multiple Tools + +```text +User: "What's the weather in Paris and what's 18% tip on €75?" +Agent Decision: → get_weather("Paris") + calculate("75 * 0.18") +Response: "Paris is 55°F with clouds. An 18% tip on €75 is €13.50." +``` + +### Example 3: No Tools Needed + +```text +User: "Tell me a joke" +Agent Decision: → No tools (direct response) +Response: "Why don't scientists trust atoms? Because they make up everything!" +``` + +### Example 4: Tool Selection Based on Context + +```text +User: "I need help with my order" +Agent Decision: → search_knowledge_base("order help support FAQ") +Response: "Based on our FAQ, here's how to check your order status..." +``` diff --git a/python/samples/demos/enterprise-chat-agent/README.md b/python/samples/demos/enterprise-chat-agent/README.md new file mode 100644 index 0000000000..9500386b96 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/README.md @@ -0,0 +1,202 @@ +# Enterprise Chat Agent + +A production-ready sample demonstrating how to build a scalable Chat API using Microsoft Agent Framework with Azure Functions and Cosmos DB. + +## Overview + +This sample showcases: + +- **Azure Functions HTTP Triggers** - Serverless REST API endpoints +- **Runtime Tool Selection** - Agent autonomously decides which tools to invoke based on user intent +- **Cosmos DB Persistence** - Durable thread and message storage with thread_id partition key +- **Production Patterns** - Error handling, observability, and security best practices +- **One-command deployment** - `azd up` deploys all infrastructure + +## Architecture + +```text +Client → Azure Functions (HTTP Triggers) → ChatAgent → Azure OpenAI + ↓ + [Tools] + ┌────────┼────────┐ + ↓ ↓ ↓ + Weather Calculator Search + ↓ + Cosmos DB (Persistence) +``` + +## Prerequisites + +- Python 3.11+ +- [Azure Developer CLI (azd)](https://learn.microsoft.com/azure/developer/azure-developer-cli/install-azd) +- [Azure Functions Core Tools v4](https://learn.microsoft.com/azure/azure-functions/functions-run-local) +- Azure subscription with: + - Azure OpenAI resource (GPT-4o recommended) + +## Quick Start + +### Option 1: Deploy to Azure (Recommended) + +Deploy the complete infrastructure with a single command: + +```bash +cd python/samples/demos/enterprise-chat-agent + +# Login to Azure +azd auth login + +# Deploy infrastructure and application +azd up +``` + +This deploys: +- **Azure Function App** (Flex Consumption) - Serverless hosting +- **Azure Cosmos DB** (Serverless) - Conversation persistence +- **Azure Storage** - Function App state +- **Application Insights** - Monitoring and observability + +#### Configuration + +Before running `azd up`, you'll be prompted for: + +| Parameter | Description | +|-----------|-------------| +| `AZURE_ENV_NAME` | Environment name (e.g., `dev`, `prod`) | +| `AZURE_LOCATION` | Azure region (e.g., `eastus2`) | +| `AZURE_OPENAI_ENDPOINT` | Your Azure OpenAI endpoint URL | +| `AZURE_OPENAI_MODEL` | Model deployment name (default: `gpt-4o`) | + +#### Other azd Commands + +```bash +# Provision infrastructure only (no deployment) +azd provision + +# Deploy application code only +azd deploy + +# View deployed resources +azd show + +# Delete all resources +azd down +``` + +### Option 2: Run Locally + +```bash +cd python/samples/demos/enterprise-chat-agent +pip install -r requirements.txt +``` + +Copy `local.settings.json.example` to `local.settings.json` and update: + +```json +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "FUNCTIONS_WORKER_RUNTIME": "python", + "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", + "AZURE_OPENAI_MODEL": "gpt-4o", + "AZURE_OPENAI_API_VERSION": "2024-10-21", + "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", + "AZURE_COSMOS_DATABASE_NAME": "chat_db", + "AZURE_COSMOS_CONTAINER_NAME": "messages" + } +} +``` + +Run locally: + +```bash +func start +``` + +### Test the API + +Use the included `demo.http` file or: + +```bash +# Create a thread +curl -X POST http://localhost:7071/api/threads + +# Send a message +curl -X POST http://localhost:7071/api/threads/{thread_id}/messages \ + -H "Content-Type: application/json" \ + -d '{"content": "What is the weather in Seattle and what is 15% tip on $85?"}' +``` + +## API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/threads` | Create a new conversation thread | +| `GET` | `/api/threads/{thread_id}` | Get thread metadata | +| `DELETE` | `/api/threads/{thread_id}` | Delete a thread | +| `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | +| `GET` | `/api/threads/{thread_id}/messages` | Get conversation history | + +## Tool Selection Demo + +The agent is configured with multiple tools and **decides at runtime** which to use: + +```text +User: "What's the weather in Tokyo?" +→ Agent calls: get_weather("Tokyo") + +User: "What's the weather in Paris and what's 18% tip on €75?" +→ Agent calls: get_weather("Paris") AND calculate("75 * 0.18") + +User: "Tell me a joke" +→ Agent responds directly (no tools needed) +``` + +## Project Structure + +```text +enterprise-chat-agent/ +├── azure.yaml # Azure Developer CLI configuration +├── DESIGN.md # Detailed design specification +├── README.md # This file +├── requirements.txt # Python dependencies +├── local.settings.json.example +├── host.json # Azure Functions host config +├── function_app.py # HTTP trigger endpoints +├── cosmos_store.py # Cosmos DB conversation store +├── tools/ +│ ├── __init__.py +│ ├── weather.py # Weather tool +│ ├── calculator.py # Calculator tool +│ └── knowledge_base.py # Knowledge base search tool +├── infra/ # Infrastructure as Code (Bicep) +│ ├── main.bicep # Main deployment template +│ ├── main.parameters.json # Parameter file +│ ├── abbreviations.json # Resource naming abbreviations +│ └── core/ +│ ├── database/ +│ │ └── cosmos-nosql.bicep +│ ├── host/ +│ │ └── function-app.bicep +│ ├── monitor/ +│ │ └── monitoring.bicep +│ └── storage/ +│ └── storage-account.bicep +└── demo.http # Test requests +``` + +## Design Documentation + +See [DESIGN.md](./DESIGN.md) for: + +- Detailed architecture decisions +- Cosmos DB data model and partition strategy +- Thread-based conversation isolation +- Phased implementation plan +- Security considerations + +## Related Resources + +- [GitHub Issue #2436](https://github.com/microsoft/agent-framework/issues/2436) +- [Microsoft Agent Framework Documentation](https://learn.microsoft.com/agent-framework/) +- [Azure Functions Python Developer Guide](https://learn.microsoft.com/azure/azure-functions/functions-reference-python) diff --git a/python/samples/demos/enterprise-chat-agent/azure.yaml b/python/samples/demos/enterprise-chat-agent/azure.yaml new file mode 100644 index 0000000000..64ac0027b6 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/azure.yaml @@ -0,0 +1,12 @@ +# Azure Developer CLI (azd) configuration +# Run `azd up` to provision infrastructure and deploy the application + +name: enterprise-chat-agent +metadata: + template: enterprise-chat-agent@0.0.1 + +services: + api: + project: . + language: python + host: function diff --git a/python/samples/demos/enterprise-chat-agent/cosmos_store.py b/python/samples/demos/enterprise-chat-agent/cosmos_store.py new file mode 100644 index 0000000000..fad9a91917 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/cosmos_store.py @@ -0,0 +1,232 @@ +# Copyright (c) Microsoft. All rights reserved. +""" +Cosmos DB Storage for Threads and Messages + +This module provides persistent storage for conversation threads and messages +using Azure Cosmos DB with thread_id as the partition key. + +Document Types: +- Thread: {"type": "thread", "id": "thread_xxx", "thread_id": "thread_xxx", ...} +- Message: {"type": "message", "id": "msg_xxx", "thread_id": "thread_xxx", ...} +""" + +import logging +import os +from datetime import datetime, timezone +from typing import Any + +from azure.cosmos import CosmosClient, PartitionKey +from azure.cosmos.exceptions import CosmosResourceNotFoundError +from azure.identity import DefaultAzureCredential + + +class CosmosConversationStore: + """ + Manages conversation threads and messages in Azure Cosmos DB. + + Uses a single container with thread_id as partition key. + Documents are differentiated by 'type' field: 'thread' or 'message'. + """ + + def __init__( + self, + endpoint: str | None = None, + database_name: str | None = None, + container_name: str | None = None, + credential: Any | None = None, + ): + """ + Initialize the Cosmos DB conversation store. + + Args: + endpoint: Cosmos DB endpoint URL. Defaults to AZURE_COSMOS_ENDPOINT env var. + database_name: Database name. Defaults to AZURE_COSMOS_DATABASE_NAME env var. + container_name: Container name. Defaults to AZURE_COSMOS_CONTAINER_NAME env var. + credential: Azure credential. Defaults to DefaultAzureCredential. + """ + self.endpoint = endpoint or os.environ.get("AZURE_COSMOS_ENDPOINT") + self.database_name = database_name or os.environ.get( + "AZURE_COSMOS_DATABASE_NAME", "chat_db" + ) + self.container_name = container_name or os.environ.get( + "AZURE_COSMOS_CONTAINER_NAME", "messages" + ) + + if not self.endpoint: + raise ValueError( + "Cosmos DB endpoint is required. " + "Set AZURE_COSMOS_ENDPOINT environment variable." + ) + + self.credential = credential or DefaultAzureCredential() + self._client: CosmosClient | None = None + self._container = None + + @property + def container(self): + """Lazy initialization of Cosmos DB container client.""" + if self._container is None: + self._client = CosmosClient(self.endpoint, credential=self.credential) + database = self._client.get_database_client(self.database_name) + self._container = database.get_container_client(self.container_name) + return self._container + + # ------------------------------------------------------------------------- + # Thread Operations + # ------------------------------------------------------------------------- + + async def create_thread(self, thread_id: str, metadata: dict | None = None) -> dict: + """ + Create a new conversation thread. + + Args: + thread_id: Unique thread identifier. + metadata: Optional metadata (user_id, session_type, etc.). + + Returns: + The created thread document. + """ + thread = { + "id": thread_id, + "thread_id": thread_id, # Partition key + "type": "thread", + "created_at": datetime.now(timezone.utc).isoformat(), + "metadata": metadata or {}, + } + + self.container.create_item(body=thread) + logging.info(f"Created thread {thread_id} in Cosmos DB") + return thread + + async def get_thread(self, thread_id: str) -> dict | None: + """ + Get a thread by ID. + + Args: + thread_id: Thread identifier. + + Returns: + Thread document or None if not found. + """ + try: + thread = self.container.read_item( + item=thread_id, + partition_key=thread_id, + ) + return thread + except CosmosResourceNotFoundError: + return None + + async def delete_thread(self, thread_id: str) -> bool: + """ + Delete a thread and all its messages. + + Args: + thread_id: Thread identifier. + + Returns: + True if deleted, False if not found. + """ + # First, get all items in the partition (thread + messages) + query = "SELECT c.id FROM c WHERE c.thread_id = @thread_id" + items = list( + self.container.query_items( + query=query, + parameters=[{"name": "@thread_id", "value": thread_id}], + partition_key=thread_id, + ) + ) + + if not items: + return False + + # Delete all items in the partition + for item in items: + self.container.delete_item(item=item["id"], partition_key=thread_id) + + logging.info(f"Deleted thread {thread_id} and {len(items)} items from Cosmos DB") + return True + + # ------------------------------------------------------------------------- + # Message Operations + # ------------------------------------------------------------------------- + + async def add_message( + self, + thread_id: str, + message_id: str, + role: str, + content: str, + tool_calls: list[dict] | None = None, + ) -> dict: + """ + Add a message to a thread. + + Args: + thread_id: Thread identifier (partition key). + message_id: Unique message identifier. + role: Message role ('user' or 'assistant'). + content: Message content. + tool_calls: Optional list of tool calls made by the agent. + + Returns: + The created message document. + """ + message = { + "id": message_id, + "thread_id": thread_id, # Partition key + "type": "message", + "role": role, + "content": content, + "tool_calls": tool_calls, + "timestamp": datetime.now(timezone.utc).isoformat(), + } + + self.container.create_item(body=message) + logging.info(f"Added {role} message {message_id} to thread {thread_id}") + return message + + async def get_messages( + self, + thread_id: str, + limit: int = 100, + ) -> list[dict]: + """ + Get all messages in a thread, ordered by timestamp. + + Args: + thread_id: Thread identifier. + limit: Maximum number of messages to return. + + Returns: + List of message documents. + """ + query = """ + SELECT * FROM c + WHERE c.thread_id = @thread_id AND c.type = 'message' + ORDER BY c.timestamp ASC + """ + + messages = list( + self.container.query_items( + query=query, + parameters=[{"name": "@thread_id", "value": thread_id}], + partition_key=thread_id, + max_item_count=limit, + ) + ) + + return messages + + async def thread_exists(self, thread_id: str) -> bool: + """ + Check if a thread exists. + + Args: + thread_id: Thread identifier. + + Returns: + True if thread exists, False otherwise. + """ + thread = await self.get_thread(thread_id) + return thread is not None diff --git a/python/samples/demos/enterprise-chat-agent/demo.http b/python/samples/demos/enterprise-chat-agent/demo.http new file mode 100644 index 0000000000..248a620b0a --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/demo.http @@ -0,0 +1,83 @@ +### Enterprise Chat Agent - Demo Requests +### Use with VS Code REST Client extension or similar + +@baseUrl = http://localhost:7071/api + +### ============================================================ +### Health Check +### ============================================================ + +### Check API health +GET {{baseUrl}}/health + +### ============================================================ +### Thread Management +### ============================================================ + +### Create a new thread +# @name createThread +POST {{baseUrl}}/threads +Content-Type: application/json + +{ + "metadata": { + "user_id": "user_123", + "session_type": "support" + } +} + +### Get thread ID from response +@threadId = {{createThread.response.body.id}} + +### Get thread details +GET {{baseUrl}}/threads/{{threadId}} + +### ============================================================ +### Tool Selection Examples +### ============================================================ + +### Example 1: Single Tool - Weather +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What's the weather in Seattle?" +} + +### Example 2: Single Tool - Calculator +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "Calculate 15% tip on $85" +} + +### Example 3: Multiple Tools - Weather + Calculator +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What's the weather in Tokyo and what's 20% of 150?" +} + +### Example 4: No Tools Needed +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "Hello! How are you today?" +} + +### ============================================================ +### Conversation History +### ============================================================ + +### Get all messages in thread +GET {{baseUrl}}/threads/{{threadId}}/messages + +### ============================================================ +### Cleanup +### ============================================================ + +### Delete thread +DELETE {{baseUrl}}/threads/{{threadId}} \ No newline at end of file diff --git a/python/samples/demos/enterprise-chat-agent/function_app.py b/python/samples/demos/enterprise-chat-agent/function_app.py new file mode 100644 index 0000000000..eecd0f67ea --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/function_app.py @@ -0,0 +1,369 @@ +""" +Enterprise Chat Agent - Azure Functions HTTP Triggers + +This sample demonstrates a production-ready Chat API using Microsoft Agent Framework +with Azure Functions. The agent is configured with multiple tools and autonomously +decides which tools to invoke based on user intent. + +Key Features: +- Azure Functions HTTP triggers for REST API endpoints +- ChatAgent with runtime tool selection +- Cosmos DB for persistent thread and message storage +- Partition key on thread_id for optimal query performance +""" + +import json +import logging +import os +import uuid + +import azure.functions as func +from azure.identity import DefaultAzureCredential + +# TODO: Uncomment when implementing with actual Agent Framework +# from microsoft.agents.ai.azure import AzureOpenAIChatClient +# from microsoft.agents.core import ChatAgent + +from tools import get_weather, calculate, search_knowledge_base +from cosmos_store import CosmosConversationStore + +app = func.FunctionApp() + +# ----------------------------------------------------------------------------- +# Cosmos DB Storage (singleton for reuse across invocations) +# ----------------------------------------------------------------------------- +_store: CosmosConversationStore | None = None + + +def get_store() -> CosmosConversationStore: + """Get or create the Cosmos DB conversation store instance.""" + global _store + if _store is None: + _store = CosmosConversationStore() + logging.info("Initialized Cosmos DB conversation store") + return _store + + +# ----------------------------------------------------------------------------- +# Agent initialization (singleton pattern for reuse across invocations) +# ----------------------------------------------------------------------------- +_agent = None + + +def get_agent(): + """ + Get or create the ChatAgent instance. + Uses singleton pattern to reuse across function invocations. + """ + global _agent + if _agent is None: + # TODO: Initialize actual Agent Framework components + # credential = DefaultAzureCredential() + # chat_client = AzureOpenAIChatClient( + # endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], + # model=os.environ["AZURE_OPENAI_MODEL"], + # credential=credential, + # ) + # _agent = ChatAgent( + # chat_client=chat_client, + # instructions="""You are a helpful assistant. + # Use the available tools when they can help answer the user's question. + # You can use multiple tools in a single response if needed.""", + # tools=[get_weather, calculate, search_knowledge_base], + # ) + logging.info("Agent initialized (placeholder)") + return _agent + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Create Thread +# ----------------------------------------------------------------------------- +@app.route(route="threads", methods=["POST"]) +async def create_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Create a new conversation thread. + + Request: + POST /api/threads + Body: {"metadata": {"user_id": "...", "session_type": "..."}} + + Response: + 201 Created + {"id": "thread_xxx", "created_at": "...", "metadata": {...}} + """ + try: + body = req.get_json() if req.get_body() else {} + except ValueError: + body = {} + + thread_id = f"thread_{uuid.uuid4().hex[:12]}" + metadata = body.get("metadata", {}) + + # Store thread in Cosmos DB + store = get_store() + thread = await store.create_thread(thread_id, metadata) + + logging.info(f"Created thread {thread_id}") + + return func.HttpResponse( + body=json.dumps(thread), + status_code=201, + mimetype="application/json", + ) + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Get Thread +# ----------------------------------------------------------------------------- +@app.route(route="threads/{thread_id}", methods=["GET"]) +async def get_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Get thread metadata. + + Request: + GET /api/threads/{thread_id} + + Response: + 200 OK + {"id": "thread_xxx", "created_at": "...", "metadata": {...}} + """ + thread_id = req.route_params.get("thread_id") + + store = get_store() + thread = await store.get_thread(thread_id) + + if thread is None: + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + return func.HttpResponse( + body=json.dumps(thread), + mimetype="application/json", + ) + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Delete Thread +# ----------------------------------------------------------------------------- +@app.route(route="threads/{thread_id}", methods=["DELETE"]) +async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Delete a thread and its messages. + + Request: + DELETE /api/threads/{thread_id} + + Response: + 204 No Content + """ + thread_id = req.route_params.get("thread_id") + + store = get_store() + deleted = await store.delete_thread(thread_id) + + if not deleted: + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + logging.info(f"Deleted thread {thread_id}") + + return func.HttpResponse(status_code=204) + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Send Message +# ----------------------------------------------------------------------------- +@app.route(route="threads/{thread_id}/messages", methods=["POST"]) +async def send_message(req: func.HttpRequest) -> func.HttpResponse: + """ + Send a message to the agent and get a response. + + The agent will autonomously decide which tools to use based on the message content. + + Request: + POST /api/threads/{thread_id}/messages + Body: {"content": "What's the weather in Seattle?"} + + Response: + 200 OK + { + "id": "msg_xxx", + "thread_id": "thread_xxx", + "role": "assistant", + "content": "The weather in Seattle is...", + "tool_calls": [...], + "timestamp": "..." + } + """ + thread_id = req.route_params.get("thread_id") + + store = get_store() + + # Check if thread exists + if not await store.thread_exists(thread_id): + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + try: + body = req.get_json() + content = body.get("content") + if not content: + return func.HttpResponse( + body=json.dumps({"error": "Missing 'content' in request body"}), + status_code=400, + mimetype="application/json", + ) + except ValueError: + return func.HttpResponse( + body=json.dumps({"error": "Invalid JSON body"}), + status_code=400, + mimetype="application/json", + ) + + # Store user message in Cosmos DB + user_message_id = f"msg_{uuid.uuid4().hex[:12]}" + await store.add_message( + thread_id=thread_id, + message_id=user_message_id, + role="user", + content=content, + ) + + # TODO: Replace with actual agent invocation + # agent = get_agent() + # response = await agent.run(content, thread_id=thread_id) + + # Placeholder response (demonstrates tool selection pattern) + tool_calls = [] + response_content = "" + + # Simple keyword-based tool selection demo + content_lower = content.lower() + if "weather" in content_lower: + # Extract location (simplified) + location = "Seattle" # Default + if "in " in content_lower: + location = content_lower.split("in ")[-1].split()[0].title() + weather_result = get_weather(location) + tool_calls.append({ + "tool": "get_weather", + "arguments": {"location": location}, + "result": weather_result, + }) + response_content += ( + f"The weather in {location} is {weather_result['temp']}°F " + f"with {weather_result['condition']}. " + ) + + if any(word in content_lower for word in ["calculate", "tip", "%", "percent"]): + # Simplified calculation demo + calc_result = calculate("85 * 0.15") + tool_calls.append({ + "tool": "calculate", + "arguments": {"expression": "85 * 0.15"}, + "result": calc_result, + }) + response_content += f"A 15% tip on $85 is ${calc_result:.2f}." + + if not response_content: + response_content = ( + f"I received your message: '{content}'. How can I help you further?" + ) + + # Store assistant response in Cosmos DB + assistant_message_id = f"msg_{uuid.uuid4().hex[:12]}" + assistant_message = await store.add_message( + thread_id=thread_id, + message_id=assistant_message_id, + role="assistant", + content=response_content.strip(), + tool_calls=tool_calls if tool_calls else None, + ) + + logging.info( + f"Processed message for thread {thread_id}, " + f"tools used: {[t['tool'] for t in tool_calls]}" + ) + + return func.HttpResponse( + body=json.dumps(assistant_message), + mimetype="application/json", + ) + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Get Messages +# ----------------------------------------------------------------------------- +@app.route(route="threads/{thread_id}/messages", methods=["GET"]) +async def get_messages(req: func.HttpRequest) -> func.HttpResponse: + """ + Get conversation history for a thread. + + Request: + GET /api/threads/{thread_id}/messages + + Response: + 200 OK + {"messages": [...]} + """ + thread_id = req.route_params.get("thread_id") + + store = get_store() + + # Check if thread exists + if not await store.thread_exists(thread_id): + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + messages = await store.get_messages(thread_id) + + return func.HttpResponse( + body=json.dumps({"messages": messages}), + mimetype="application/json", + ) + + +# ----------------------------------------------------------------------------- +# HTTP Trigger: Health Check +# ----------------------------------------------------------------------------- +@app.route(route="health", methods=["GET"]) +async def health_check(req: func.HttpRequest) -> func.HttpResponse: + """ + Health check endpoint for monitoring. + + Request: + GET /api/health + + Response: + 200 OK + {"status": "healthy", "version": "1.0.0", "cosmos_connected": true} + """ + cosmos_connected = False + try: + store = get_store() + # Simple connectivity check + store.container # This will initialize the connection if not already done + cosmos_connected = True + except Exception as e: + logging.warning(f"Cosmos DB connectivity check failed: {e}") + + return func.HttpResponse( + body=json.dumps({ + "status": "healthy", + "version": "1.0.0", + "cosmos_connected": cosmos_connected, + }), + mimetype="application/json", + ) diff --git a/python/samples/demos/enterprise-chat-agent/host.json b/python/samples/demos/enterprise-chat-agent/host.json new file mode 100644 index 0000000000..06d01bdaa9 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/host.json @@ -0,0 +1,15 @@ +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, + "extensionBundle": { + "id": "Microsoft.Azure.Functions.ExtensionBundle", + "version": "[4.*, 5.0.0)" + } +} diff --git a/python/samples/demos/enterprise-chat-agent/infra/abbreviations.json b/python/samples/demos/enterprise-chat-agent/infra/abbreviations.json new file mode 100644 index 0000000000..e078caf0d6 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/abbreviations.json @@ -0,0 +1,8 @@ +{ + "documentDBDatabaseAccounts": "cosmos-", + "insightsComponents": "appi-", + "operationalInsightsWorkspaces": "log-", + "resourcesResourceGroups": "rg-", + "storageStorageAccounts": "st", + "webSitesFunctions": "func-" +} diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep b/python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep new file mode 100644 index 0000000000..3e7759101d --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep @@ -0,0 +1,118 @@ +// Copyright (c) Microsoft. All rights reserved. +// Azure Cosmos DB NoSQL account, database, and container + +@description('Name of the Cosmos DB account') +param accountName string + +@description('Location for the Cosmos DB account') +param location string + +@description('Tags to apply to the Cosmos DB account') +param tags object = {} + +@description('Name of the database') +param databaseName string + +@description('Name of the container') +param containerName string + +@description('Partition key path for the container') +param partitionKeyPath string = '/thread_id' + +@description('Enable free tier (only one per subscription)') +param enableFreeTier bool = false + +@description('Default consistency level') +@allowed(['Eventual', 'ConsistentPrefix', 'Session', 'BoundedStaleness', 'Strong']) +param defaultConsistencyLevel string = 'Session' + +// ============================================================================ +// Cosmos DB Account +// ============================================================================ + +resource cosmosAccount 'Microsoft.DocumentDB/databaseAccounts@2023-11-15' = { + name: accountName + location: location + tags: tags + kind: 'GlobalDocumentDB' + properties: { + databaseAccountOfferType: 'Standard' + enableFreeTier: enableFreeTier + consistencyPolicy: { + defaultConsistencyLevel: defaultConsistencyLevel + } + locations: [ + { + locationName: location + failoverPriority: 0 + isZoneRedundant: false + } + ] + capabilities: [ + { + name: 'EnableServerless' + } + ] + // Security settings + publicNetworkAccess: 'Enabled' + disableLocalAuth: false + } +} + +// ============================================================================ +// Database +// ============================================================================ + +resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2023-11-15' = { + parent: cosmosAccount + name: databaseName + properties: { + resource: { + id: databaseName + } + } +} + +// ============================================================================ +// Container +// ============================================================================ + +resource container 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2023-11-15' = { + parent: database + name: containerName + properties: { + resource: { + id: containerName + partitionKey: { + paths: [partitionKeyPath] + kind: 'Hash' + } + indexingPolicy: { + automatic: true + indexingMode: 'consistent' + includedPaths: [ + { + path: '/*' + } + ] + excludedPaths: [ + { + path: '/"_etag"/?' + } + ] + } + // Default TTL: -1 means items don't expire unless specified + defaultTtl: -1 + } + } +} + +// ============================================================================ +// Outputs +// ============================================================================ + +output accountId string = cosmosAccount.id +output accountName string = cosmosAccount.name +output endpoint string = cosmosAccount.properties.documentEndpoint +output databaseName string = database.name +output containerName string = container.name diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep b/python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep new file mode 100644 index 0000000000..87bd5abf4c --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep @@ -0,0 +1,188 @@ +// Copyright (c) Microsoft. All rights reserved. +// Azure Function App (Flex Consumption Plan) + +@description('Name of the Function App') +param name string + +@description('Location for the Function App') +param location string + +@description('Tags to apply to the Function App') +param tags object = {} + +@description('Name of the storage account') +param storageAccountName string + +@description('Name of the Application Insights resource') +param applicationInsightsName string + +@description('Name of the Cosmos DB account') +param cosmosAccountName string + +@description('Cosmos DB database name') +param cosmosDatabaseName string + +@description('Cosmos DB container name') +param cosmosContainerName string + +@description('Azure OpenAI endpoint URL') +param azureOpenAiEndpoint string = '' + +@description('Azure OpenAI model deployment name') +param azureOpenAiModel string = 'gpt-4o' + +// ============================================================================ +// References to existing resources +// ============================================================================ + +resource storageAccount 'Microsoft.Storage/storageAccounts@2023-01-01' existing = { + name: storageAccountName +} + +resource applicationInsights 'Microsoft.Insights/components@2020-02-02' existing = { + name: applicationInsightsName +} + +resource cosmosAccount 'Microsoft.DocumentDB/databaseAccounts@2023-11-15' existing = { + name: cosmosAccountName +} + +// ============================================================================ +// App Service Plan (Flex Consumption) +// ============================================================================ + +resource flexPlan 'Microsoft.Web/serverfarms@2023-12-01' = { + name: '${name}-plan' + location: location + tags: tags + sku: { + tier: 'FlexConsumption' + name: 'FC1' + } + kind: 'functionapp' + properties: { + reserved: true // Required for Linux + } +} + +// ============================================================================ +// Function App +// ============================================================================ + +resource functionApp 'Microsoft.Web/sites@2023-12-01' = { + name: name + location: location + tags: union(tags, { 'azd-service-name': 'api' }) + kind: 'functionapp,linux' + identity: { + type: 'SystemAssigned' + } + properties: { + serverFarmId: flexPlan.id + httpsOnly: true + publicNetworkAccess: 'Enabled' + functionAppConfig: { + deployment: { + storage: { + type: 'blobContainer' + value: '${storageAccount.properties.primaryEndpoints.blob}deploymentpackage' + authentication: { + type: 'SystemAssignedIdentity' + } + } + } + scaleAndConcurrency: { + maximumInstanceCount: 100 + instanceMemoryMB: 2048 + } + runtime: { + name: 'python' + version: '3.11' + } + } + siteConfig: { + appSettings: [ + { + name: 'AzureWebJobsStorage__accountName' + value: storageAccount.name + } + { + name: 'FUNCTIONS_EXTENSION_VERSION' + value: '~4' + } + { + name: 'APPLICATIONINSIGHTS_CONNECTION_STRING' + value: applicationInsights.properties.ConnectionString + } + { + name: 'AZURE_OPENAI_ENDPOINT' + value: azureOpenAiEndpoint + } + { + name: 'AZURE_OPENAI_MODEL' + value: azureOpenAiModel + } + { + name: 'COSMOS_ENDPOINT' + value: cosmosAccount.properties.documentEndpoint + } + { + name: 'COSMOS_DATABASE_NAME' + value: cosmosDatabaseName + } + { + name: 'COSMOS_CONTAINER_NAME' + value: cosmosContainerName + } + ] + cors: { + allowedOrigins: [ + 'https://portal.azure.com' + ] + } + } + } +} + +// ============================================================================ +// Role Assignments for Storage (required for MI-based deployment) +// ============================================================================ + +// Storage Blob Data Owner role for Function App managed identity +var storageBlobDataOwnerRoleId = 'b7e6dc6d-f1e8-4753-8033-0f276bb0955b' + +resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = { + name: guid(storageAccount.id, functionApp.id, storageBlobDataOwnerRoleId) + scope: storageAccount + properties: { + roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', storageBlobDataOwnerRoleId) + principalId: functionApp.identity.principalId + principalType: 'ServicePrincipal' + } +} + +// ============================================================================ +// Role Assignments +// ============================================================================ + +// Cosmos DB Data Contributor role for Function App managed identity +var cosmosDataContributorRoleId = '00000000-0000-0000-0000-000000000002' // Cosmos DB Built-in Data Contributor + +resource cosmosRoleAssignment 'Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-11-15' = { + parent: cosmosAccount + name: guid(cosmosAccount.id, functionApp.id, cosmosDataContributorRoleId) + properties: { + roleDefinitionId: '${cosmosAccount.id}/sqlRoleDefinitions/${cosmosDataContributorRoleId}' + principalId: functionApp.identity.principalId + scope: cosmosAccount.id + } +} + +// ============================================================================ +// Outputs +// ============================================================================ + +output id string = functionApp.id +output name string = functionApp.name +output url string = 'https://${functionApp.properties.defaultHostName}' +output principalId string = functionApp.identity.principalId diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep b/python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep new file mode 100644 index 0000000000..e39693e8ba --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft. All rights reserved. +// Monitoring resources: Log Analytics Workspace and Application Insights + +@description('Location for all resources') +param location string + +@description('Tags to apply to all resources') +param tags object = {} + +@description('Name of the Log Analytics workspace') +param logAnalyticsName string + +@description('Name of the Application Insights resource') +param applicationInsightsName string + +// ============================================================================ +// Log Analytics Workspace +// ============================================================================ + +resource logAnalytics 'Microsoft.OperationalInsights/workspaces@2022-10-01' = { + name: logAnalyticsName + location: location + tags: tags + properties: { + sku: { + name: 'PerGB2018' + } + retentionInDays: 30 + } +} + +// ============================================================================ +// Application Insights +// ============================================================================ + +resource applicationInsights 'Microsoft.Insights/components@2020-02-02' = { + name: applicationInsightsName + location: location + tags: tags + kind: 'web' + properties: { + Application_Type: 'web' + WorkspaceResourceId: logAnalytics.id + } +} + +// ============================================================================ +// Outputs +// ============================================================================ + +output logAnalyticsWorkspaceId string = logAnalytics.id +output logAnalyticsWorkspaceName string = logAnalytics.name +output applicationInsightsId string = applicationInsights.id +output applicationInsightsName string = applicationInsights.name +output applicationInsightsConnectionString string = applicationInsights.properties.ConnectionString diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep b/python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep new file mode 100644 index 0000000000..6149c555a6 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep @@ -0,0 +1,59 @@ +// Copyright (c) Microsoft. All rights reserved. +// Azure Storage Account for Function App + +@description('Name of the storage account') +param name string + +@description('Location for the storage account') +param location string + +@description('Tags to apply to the storage account') +param tags object = {} + +@description('Storage account SKU') +@allowed(['Standard_LRS', 'Standard_GRS', 'Standard_ZRS']) +param sku string = 'Standard_LRS' + +// ============================================================================ +// Storage Account +// ============================================================================ + +resource storageAccount 'Microsoft.Storage/storageAccounts@2023-01-01' = { + name: name + location: location + tags: tags + sku: { + name: sku + } + kind: 'StorageV2' + properties: { + minimumTlsVersion: 'TLS1_2' + allowBlobPublicAccess: false + supportsHttpsTrafficOnly: true + allowSharedKeyAccess: true // Required for Function App deployment + accessTier: 'Hot' + } +} + +// Blob service for Function App deployment packages +resource blobService 'Microsoft.Storage/storageAccounts/blobServices@2023-01-01' = { + parent: storageAccount + name: 'default' +} + +// Container for deployment packages (required for Flex Consumption) +resource deploymentContainer 'Microsoft.Storage/storageAccounts/blobServices/containers@2023-01-01' = { + parent: blobService + name: 'deploymentpackage' + properties: { + publicAccess: 'None' + } +} + +// ============================================================================ +// Outputs +// ============================================================================ + +output id string = storageAccount.id +output name string = storageAccount.name +output primaryEndpoints object = storageAccount.properties.primaryEndpoints diff --git a/python/samples/demos/enterprise-chat-agent/infra/main.bicep b/python/samples/demos/enterprise-chat-agent/infra/main.bicep new file mode 100644 index 0000000000..46884b0988 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/main.bicep @@ -0,0 +1,140 @@ +// Copyright (c) Microsoft. All rights reserved. +// Enterprise Chat Agent - Infrastructure as Code +// +// This Bicep template deploys: +// - Azure Function App (Flex Consumption) +// - Azure Cosmos DB (NoSQL) +// - Azure OpenAI (optional, can use existing) +// - Supporting resources (Storage, App Insights, Log Analytics) + +targetScope = 'subscription' + +// ============================================================================ +// Parameters +// ============================================================================ + +@minLength(1) +@maxLength(64) +@description('Name of the environment (e.g., dev, staging, prod)') +param environmentName string + +@minLength(1) +@description('Primary location for all resources') +param location string + +@description('Name of the resource group') +param resourceGroupName string = '' + +@description('Azure OpenAI endpoint URL (leave empty to create new)') +param azureOpenAiEndpoint string = '' + +@description('Azure OpenAI model deployment name') +param azureOpenAiModel string = 'gpt-4o' + +@description('Cosmos DB database name') +param cosmosDatabaseName string = 'chat_db' + +@description('Cosmos DB container name for messages') +param cosmosContainerName string = 'messages' + +// ============================================================================ +// Variables +// ============================================================================ + +var abbrs = loadJsonContent('./abbreviations.json') +var tags = { 'azd-env-name': environmentName } +var resourceToken = toLower(uniqueString(subscription().id, environmentName, location)) + +// ============================================================================ +// Resource Group +// ============================================================================ + +resource rg 'Microsoft.Resources/resourceGroups@2022-09-01' = { + name: !empty(resourceGroupName) ? resourceGroupName : '${abbrs.resourcesResourceGroups}${environmentName}' + location: location + tags: tags +} + +// ============================================================================ +// Monitoring (Log Analytics + App Insights) +// ============================================================================ + +module monitoring './core/monitor/monitoring.bicep' = { + name: 'monitoring' + scope: rg + params: { + location: location + tags: tags + logAnalyticsName: '${abbrs.operationalInsightsWorkspaces}${resourceToken}' + applicationInsightsName: '${abbrs.insightsComponents}${resourceToken}' + } +} + +// ============================================================================ +// Storage Account (for Function App) +// ============================================================================ + +module storage './core/storage/storage-account.bicep' = { + name: 'storage' + scope: rg + params: { + name: '${abbrs.storageStorageAccounts}${resourceToken}' + location: location + tags: tags + } +} + +// ============================================================================ +// Cosmos DB +// ============================================================================ + +module cosmos './core/database/cosmos-nosql.bicep' = { + name: 'cosmos' + scope: rg + params: { + accountName: '${abbrs.documentDBDatabaseAccounts}${resourceToken}' + location: location + tags: tags + databaseName: cosmosDatabaseName + containerName: cosmosContainerName + partitionKeyPath: '/thread_id' + } +} + +// ============================================================================ +// Function App +// ============================================================================ + +module functionApp './core/host/function-app.bicep' = { + name: 'functionApp' + scope: rg + params: { + name: '${abbrs.webSitesFunctions}${resourceToken}' + location: location + tags: tags + storageAccountName: storage.outputs.name + applicationInsightsName: monitoring.outputs.applicationInsightsName + cosmosAccountName: cosmos.outputs.accountName + cosmosDatabaseName: cosmosDatabaseName + cosmosContainerName: cosmosContainerName + azureOpenAiEndpoint: azureOpenAiEndpoint + azureOpenAiModel: azureOpenAiModel + } +} + +// ============================================================================ +// Outputs +// ============================================================================ + +output AZURE_LOCATION string = location +output AZURE_TENANT_ID string = tenant().tenantId +output AZURE_RESOURCE_GROUP string = rg.name + +output AZURE_FUNCTION_APP_NAME string = functionApp.outputs.name +output AZURE_FUNCTION_APP_URL string = functionApp.outputs.url + +output AZURE_COSMOS_ENDPOINT string = cosmos.outputs.endpoint +output AZURE_COSMOS_DATABASE_NAME string = cosmosDatabaseName +output AZURE_COSMOS_CONTAINER_NAME string = cosmosContainerName + +output APPLICATIONINSIGHTS_CONNECTION_STRING string = monitoring.outputs.applicationInsightsConnectionString diff --git a/python/samples/demos/enterprise-chat-agent/infra/main.parameters.json b/python/samples/demos/enterprise-chat-agent/infra/main.parameters.json new file mode 100644 index 0000000000..24e79cf2e9 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/infra/main.parameters.json @@ -0,0 +1,18 @@ +{ + "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentParameters.json#", + "contentVersion": "1.0.0.0", + "parameters": { + "environmentName": { + "value": "${AZURE_ENV_NAME}" + }, + "location": { + "value": "${AZURE_LOCATION}" + }, + "azureOpenAiEndpoint": { + "value": "${AZURE_OPENAI_ENDPOINT}" + }, + "azureOpenAiModel": { + "value": "${AZURE_OPENAI_MODEL=gpt-4o}" + } + } +} diff --git a/python/samples/demos/enterprise-chat-agent/local.settings.json.example b/python/samples/demos/enterprise-chat-agent/local.settings.json.example new file mode 100644 index 0000000000..4ffbb3ca42 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/local.settings.json.example @@ -0,0 +1,13 @@ +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "FUNCTIONS_WORKER_RUNTIME": "python", + "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", + "AZURE_OPENAI_MODEL": "gpt-4o", + "AZURE_OPENAI_API_VERSION": "2024-10-21", + "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", + "AZURE_COSMOS_DATABASE_NAME": "chat_db", + "AZURE_COSMOS_CONTAINER_NAME": "messages" + } +} diff --git a/python/samples/demos/enterprise-chat-agent/requirements.txt b/python/samples/demos/enterprise-chat-agent/requirements.txt new file mode 100644 index 0000000000..0fcf2bd020 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/requirements.txt @@ -0,0 +1,17 @@ +# Azure Functions +azure-functions>=1.21.0 + +# Microsoft Agent Framework +# TODO: Uncomment when packages are published to PyPI +# microsoft-agents-core +# microsoft-agents-ai-azure + +# Azure SDK +azure-identity>=1.15.0 +azure-cosmos>=4.7.0 + +# Azure OpenAI (used directly until Agent Framework packages are published) +openai>=1.0.0 + +# Utilities +pydantic>=2.0.0 diff --git a/python/samples/demos/enterprise-chat-agent/tools/__init__.py b/python/samples/demos/enterprise-chat-agent/tools/__init__.py new file mode 100644 index 0000000000..11b66644f0 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/tools/__init__.py @@ -0,0 +1,16 @@ +""" +Enterprise Chat Agent - Function Tools + +This module contains the tools that the ChatAgent can invoke at runtime. +The agent autonomously decides which tools to use based on the user's message. +""" + +from tools.weather import get_weather +from tools.calculator import calculate +from tools.knowledge_base import search_knowledge_base + +__all__ = [ + "get_weather", + "calculate", + "search_knowledge_base", +] diff --git a/python/samples/demos/enterprise-chat-agent/tools/calculator.py b/python/samples/demos/enterprise-chat-agent/tools/calculator.py new file mode 100644 index 0000000000..4d7227c1a1 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/tools/calculator.py @@ -0,0 +1,81 @@ +""" +Calculator Tool + +Provides safe mathematical expression evaluation. +""" + +import ast +import operator +from typing import Union + +# TODO: Uncomment when implementing with actual Agent Framework +# from microsoft.agents.core import ai_function + +# Safe operators for expression evaluation +SAFE_OPERATORS = { + ast.Add: operator.add, + ast.Sub: operator.sub, + ast.Mult: operator.mul, + ast.Div: operator.truediv, + ast.Pow: operator.pow, + ast.USub: operator.neg, + ast.UAdd: operator.pos, +} + + +def _safe_eval(node: ast.AST) -> Union[int, float]: + """ + Safely evaluate an AST node containing only numeric operations. + """ + if isinstance(node, ast.Constant): + if isinstance(node.value, (int, float)): + return node.value + raise ValueError(f"Unsupported constant type: {type(node.value)}") + + if isinstance(node, ast.BinOp): + left = _safe_eval(node.left) + right = _safe_eval(node.right) + op_type = type(node.op) + if op_type in SAFE_OPERATORS: + return SAFE_OPERATORS[op_type](left, right) + raise ValueError(f"Unsupported operator: {op_type.__name__}") + + if isinstance(node, ast.UnaryOp): + operand = _safe_eval(node.operand) + op_type = type(node.op) + if op_type in SAFE_OPERATORS: + return SAFE_OPERATORS[op_type](operand) + raise ValueError(f"Unsupported unary operator: {op_type.__name__}") + + if isinstance(node, ast.Expression): + return _safe_eval(node.body) + + raise ValueError(f"Unsupported AST node type: {type(node).__name__}") + + +# @ai_function +def calculate(expression: str) -> float: + """ + Evaluate a mathematical expression safely. + + Supports: +, -, *, /, ** (power), parentheses + + Args: + expression: A mathematical expression string (e.g., "85 * 0.15") + + Returns: + The result of the calculation. + + Raises: + ValueError: If the expression contains unsupported operations. + """ + try: + # Parse the expression into an AST + tree = ast.parse(expression, mode="eval") + + # Safely evaluate the AST + result = _safe_eval(tree) + + return float(result) + except (SyntaxError, ValueError) as e: + raise ValueError(f"Invalid expression '{expression}': {e}") diff --git a/python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py b/python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py new file mode 100644 index 0000000000..9610229aab --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py @@ -0,0 +1,90 @@ +""" +Knowledge Base Search Tool + +Provides search functionality over a knowledge base. +In a production scenario, this would integrate with Azure AI Search, +Cosmos DB, or another search service. +""" + +from typing import Optional + +# TODO: Uncomment when implementing with actual Agent Framework +# from microsoft.agents.core import ai_function + +# Simulated knowledge base entries +KNOWLEDGE_BASE = [ + { + "id": "kb_001", + "title": "Order Status FAQ", + "content": "To check your order status, log into your account and visit the 'My Orders' section. You can also track your package using the tracking number sent to your email.", + "category": "orders", + }, + { + "id": "kb_002", + "title": "Return Policy", + "content": "Items can be returned within 30 days of purchase. Items must be unused and in original packaging. Refunds are processed within 5-7 business days.", + "category": "returns", + }, + { + "id": "kb_003", + "title": "Shipping Information", + "content": "Standard shipping takes 5-7 business days. Express shipping (2-3 days) is available for an additional fee. Free shipping on orders over $50.", + "category": "shipping", + }, + { + "id": "kb_004", + "title": "Payment Methods", + "content": "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay. All transactions are securely processed.", + "category": "payments", + }, + { + "id": "kb_005", + "title": "Account Management", + "content": "To update your account information, go to Settings > Profile. You can change your email, password, and notification preferences there.", + "category": "account", + }, +] + + +# @ai_function +def search_knowledge_base( + query: str, + category: Optional[str] = None, + max_results: int = 3, +) -> list[dict]: + """ + Search the knowledge base for relevant information. + + Args: + query: The search query. + category: Optional category to filter results (e.g., "orders", "returns"). + max_results: Maximum number of results to return. + + Returns: + A list of matching knowledge base entries. + """ + query_lower = query.lower() + results = [] + + for entry in KNOWLEDGE_BASE: + # Filter by category if specified + if category and entry["category"] != category.lower(): + continue + + # Simple keyword matching (replace with vector search in production) + if ( + query_lower in entry["title"].lower() + or query_lower in entry["content"].lower() + or any(word in entry["content"].lower() for word in query_lower.split()) + ): + results.append({ + "id": entry["id"], + "title": entry["title"], + "content": entry["content"], + "category": entry["category"], + "relevance_score": 0.85, # Simulated score + }) + + # Sort by relevance (simulated) and limit results + results.sort(key=lambda x: x["relevance_score"], reverse=True) + return results[:max_results] diff --git a/python/samples/demos/enterprise-chat-agent/tools/weather.py b/python/samples/demos/enterprise-chat-agent/tools/weather.py new file mode 100644 index 0000000000..b53689eec8 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/tools/weather.py @@ -0,0 +1,34 @@ +""" +Weather Tool + +Provides weather information for a given location. +In a production scenario, this would integrate with a weather API. +""" + +import random + +# TODO: Uncomment when implementing with actual Agent Framework +# from microsoft.agents.core import ai_function + + +# @ai_function +def get_weather(location: str) -> dict: + """ + Get current weather for a location. + + Args: + location: The city or location to get weather for. + + Returns: + A dictionary containing temperature and weather condition. + """ + # Simulated weather data (replace with actual API call in production) + conditions = ["sunny", "cloudy", "light rain", "partly cloudy", "overcast"] + + return { + "location": location, + "temp": random.randint(32, 85), + "condition": random.choice(conditions), + "humidity": random.randint(30, 90), + "unit": "fahrenheit", + } From 1e3ec835fd85e761c4b20a1f8b6a79b8845d92c7 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Wed, 10 Dec 2025 00:23:49 +0000 Subject: [PATCH 02/10] Implement observability design for Enterprise Chat Agent with OpenTelemetry instrumentation - Added observability design documentation outlining principles and framework integration. - Created route blueprints for threads, messages, and health check endpoints. - Implemented health check endpoint to verify Cosmos DB connectivity. - Developed message handling endpoints for sending and retrieving messages in conversation threads. - Established thread management endpoints for creating, retrieving, and deleting threads. - Integrated Cosmos DB storage for threads and messages with lazy initialization. - Added observability module to manage custom spans for HTTP requests, Cosmos DB operations, and validation checks. - Configured OpenTelemetry setup for tracing and metrics collection. - Updated requirements for OpenTelemetry and Azure Monitor dependencies. - Documented environment variables and Azure Functions configuration for observability. --- .../demos/enterprise-chat-agent/demo.http | 8 +- .../{ => docs}/DESIGN.md | 0 .../docs/observability-design.md | 243 ++++++++++++ .../enterprise-chat-agent/function_app.py | 364 +----------------- .../local.settings.json.example | 6 +- .../enterprise-chat-agent/requirements.txt | 15 +- .../enterprise-chat-agent/routes/__init__.py | 16 + .../enterprise-chat-agent/routes/health.py | 43 +++ .../enterprise-chat-agent/routes/messages.py | 208 ++++++++++ .../enterprise-chat-agent/routes/threads.py | 139 +++++++ .../services/__init__.py | 27 ++ .../{ => services}/cosmos_store.py | 89 ++++- .../services/observability.py | 177 +++++++++ 13 files changed, 969 insertions(+), 366 deletions(-) rename python/samples/demos/enterprise-chat-agent/{ => docs}/DESIGN.md (100%) create mode 100644 python/samples/demos/enterprise-chat-agent/docs/observability-design.md create mode 100644 python/samples/demos/enterprise-chat-agent/routes/__init__.py create mode 100644 python/samples/demos/enterprise-chat-agent/routes/health.py create mode 100644 python/samples/demos/enterprise-chat-agent/routes/messages.py create mode 100644 python/samples/demos/enterprise-chat-agent/routes/threads.py create mode 100644 python/samples/demos/enterprise-chat-agent/services/__init__.py rename python/samples/demos/enterprise-chat-agent/{ => services}/cosmos_store.py (70%) create mode 100644 python/samples/demos/enterprise-chat-agent/services/observability.py diff --git a/python/samples/demos/enterprise-chat-agent/demo.http b/python/samples/demos/enterprise-chat-agent/demo.http index 248a620b0a..7b45da810b 100644 --- a/python/samples/demos/enterprise-chat-agent/demo.http +++ b/python/samples/demos/enterprise-chat-agent/demo.http @@ -20,9 +20,11 @@ POST {{baseUrl}}/threads Content-Type: application/json { + "user_id": "user_123", + "title": "Customer Support Chat", "metadata": { - "user_id": "user_123", - "session_type": "support" + "session_type": "support", + "department": "technical" } } @@ -41,7 +43,7 @@ POST {{baseUrl}}/threads/{{threadId}}/messages Content-Type: application/json { - "content": "What's the weather in Seattle?" + "content": "What's the weather in New York?" } ### Example 2: Single Tool - Calculator diff --git a/python/samples/demos/enterprise-chat-agent/DESIGN.md b/python/samples/demos/enterprise-chat-agent/docs/DESIGN.md similarity index 100% rename from python/samples/demos/enterprise-chat-agent/DESIGN.md rename to python/samples/demos/enterprise-chat-agent/docs/DESIGN.md diff --git a/python/samples/demos/enterprise-chat-agent/docs/observability-design.md b/python/samples/demos/enterprise-chat-agent/docs/observability-design.md new file mode 100644 index 0000000000..b10f934062 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/docs/observability-design.md @@ -0,0 +1,243 @@ +# Observability Design — Enterprise Chat Agent + +This document describes the OpenTelemetry observability design for the Enterprise Chat Agent, aligned with the Microsoft Agent Framework's built-in instrumentation. + +## Design Principles + +1. **Don't duplicate framework instrumentation** — Use the Agent Framework's automatic spans for agent/LLM/tool tracing +2. **Fill the gaps** — Add manual spans only for layers the framework cannot see (HTTP, Cosmos DB, validation) +3. **Use framework APIs** — Leverage `setup_observability()`, `get_tracer()`, and `get_meter()` from `agent_framework` + +--- + +## Framework's Built-in Instrumentation (Automatic) + +The Microsoft Agent Framework automatically creates these spans when you call agent/chat client methods: + +| Span Name Pattern | When Created | Key Attributes | +|---|---|---| +| `invoke_agent {agent_name}` | `agent.run()` / `agent.run_stream()` | `gen_ai.agent.id`, `gen_ai.agent.name`, `gen_ai.conversation.id` | +| `chat {model_id}` | `chat_client.get_response()` / `get_streaming_response()` | `gen_ai.request.model`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens` | +| `execute_tool {function_name}` | Tool function invocations via `AIFunction` | `gen_ai.tool.name`, `gen_ai.tool.call.id`, `gen_ai.tool.type` | + +### Framework-Provided Functions + +```python +from agent_framework import setup_observability, get_tracer, get_meter +``` + +- **`setup_observability()`** — Configures TracerProvider, MeterProvider, LoggerProvider with OTLP/Azure Monitor exporters +- **`get_tracer()`** — Returns the configured tracer for custom spans +- **`get_meter()`** — Returns the configured meter for custom metrics + +### Automatic Metrics + +| Metric Name | Description | +|---|---| +| `gen_ai.client.operation.duration` | Duration of LLM operations | +| `gen_ai.client.token.usage` | Token usage (input/output) | +| `agent_framework.function.invocation.duration` | Tool function execution duration | + +--- + +## Tool vs Non-Tool Service Calls + +Whether you need manual spans depends on **how** a service is invoked: + +| Scenario | Manual Span Needed? | Why | +|----------|---------------------|-----| +| AI Search **as agent tool** | ❌ No | Framework creates `execute_tool` span automatically | +| Redis **as agent tool** | ❌ No | Framework creates `execute_tool` span automatically | +| AI Search **outside agent** (pre/post processing) | ✅ Yes | Framework doesn't see calls outside `agent.run()` | +| Redis **outside agent** (caching layer) | ✅ Yes | Framework doesn't see calls outside `agent.run()` | +| Cosmos DB (thread storage) | ✅ Yes | Always called outside agent context | + +### Example: Tool vs Direct Call + +```python +# AS A TOOL - Framework handles instrumentation automatically +@ai_function +async def search_knowledge_base(query: str) -> str: + return await ai_search_client.search(query) # No manual span needed + +response = await agent.run(message, tools=[search_knowledge_base]) +# Framework creates: invoke_agent → execute_tool search_knowledge_base + +# OUTSIDE AGENT - Manual span required +async with redis_span("get", "session_cache"): + cached_context = await redis.get(f"context:{thread_id}") # Before agent call + +async with ai_search_span("index", "conversation_logs"): + await ai_search.index_document(log) # After agent call for analytics +``` + +--- + +## Enterprise Chat Agent Custom Spans (Manual) + +The framework doesn't know about HTTP requests, Cosmos DB operations, or services called outside the agent. We add spans for these layers: + +| Layer | Span Name Pattern | Purpose | +|---|---|---| +| HTTP Request | `http.request {method} {path}` | Track request lifecycle | +| Cosmos DB | `cosmos.{operation} {container}` | Track database operations | +| Redis | `redis.{operation} {key_pattern}` | Track caching operations | +| AI Search | `ai_search.{operation} {index}` | Track search operations | +| Validation | `request.validate {operation}` | Track authorization checks | + +--- + +## Span Hierarchy + +```text +http.request POST /threads/{thread_id}/messages ← MANUAL (HTTP layer) +├── cosmos.read threads ← MANUAL (Cosmos layer) +├── request.validate verify_thread_ownership ← MANUAL (Validation) +├── invoke_agent ChatAgent ← FRAMEWORK (automatic) +│ ├── chat gpt-4o ← FRAMEWORK (automatic) +│ │ └── (internal LLM call spans) +│ └── execute_tool get_weather ← FRAMEWORK (automatic) +├── cosmos.upsert threads ← MANUAL (Cosmos layer) +└── http.response ← MANUAL (optional) +``` + +--- + +## Implementation + +The observability module (`observability.py`) provides async context managers: + +- **`init_observability()`** — Wraps `setup_observability()`, call once at startup +- **`http_request_span(method, path, thread_id, user_id)`** — Top-level HTTP span +- **`cosmos_span(operation, container, partition_key)`** — Cosmos DB operation span +- **`redis_span(operation, key_pattern)`** — Redis caching span +- **`ai_search_span(operation, index)`** — AI Search span +- **`validation_span(operation)`** — Request validation span + +### Usage Pattern + +```python +from observability import init_observability, http_request_span, cosmos_span + +init_observability() # Once at startup + +@app.route(route="threads/{thread_id}/messages", methods=["POST"]) +async def send_message(req: func.HttpRequest) -> func.HttpResponse: + async with http_request_span("POST", "/threads/{thread_id}/messages", thread_id, user_id): + async with cosmos_span("read", "threads", thread_id): + thread = await cosmos_store.get_thread(thread_id) + + # Agent invocation - NO manual span needed (framework handles it) + response = await agent.run(message, thread=agent_thread) + + async with cosmos_span("upsert", "threads", thread_id): + await cosmos_store.save_thread_state(thread_id, thread) +``` + +--- + +## Dependencies + +Add to `requirements.txt`: + +```txt +# OpenTelemetry Core +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 + +# Exporters +opentelemetry-exporter-otlp>=1.25.0 +azure-monitor-opentelemetry-exporter>=1.0.0b41 + +# Semantic Conventions +opentelemetry-semantic-conventions-ai>=0.5.0 +``` + +--- + +## Environment Variables + +Configure these in `local.settings.json` or Azure Function App settings: + +| Variable | Description | Example | +|---|---|---| +| `ENABLE_OTEL` | Enable OpenTelemetry | `true` | +| `ENABLE_SENSITIVE_DATA` | Log message contents (dev only!) | `false` | +| `OTLP_ENDPOINT` | OTLP collector endpoint | `http://localhost:4317` | +| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Azure Monitor connection | `InstrumentationKey=...` | +| `OTEL_SERVICE_NAME` | Service name for traces | `enterprise-chat-agent` | + +### Example `local.settings.json` + +```json +{ + "IsEncrypted": false, + "Values": { + "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "FUNCTIONS_WORKER_RUNTIME": "python", + "COSMOS_ENDPOINT": "https://your-cosmos.documents.azure.com:443/", + "COSMOS_DATABASE": "chat-database", + "AZURE_OPENAI_ENDPOINT": "https://your-openai.openai.azure.com/", + "AZURE_OPENAI_DEPLOYMENT": "gpt-4o", + "ENABLE_OTEL": "true", + "ENABLE_SENSITIVE_DATA": "false", + "OTLP_ENDPOINT": "http://localhost:4317", + "OTEL_SERVICE_NAME": "enterprise-chat-agent" + } +} +``` + +### Azure Functions `host.json` Configuration + +Enable OpenTelemetry mode for Azure Functions: + +```json +{ + "version": "2.0", + "logging": { + "applicationInsights": { + "samplingSettings": { + "isEnabled": true, + "excludedTypes": "Request" + } + } + }, + "telemetryMode": "OpenTelemetry" +} +``` + +--- + +## Viewing Traces + +### Local Development + +Use one of these OTLP-compatible backends: + +1. **Jaeger**: `docker run -p 16686:16686 -p 4317:4317 jaegertracing/all-in-one` +2. **Aspire Dashboard**: Part of .NET Aspire, provides a nice UI +3. **AI Toolkit Extension**: Set `VS_CODE_EXTENSION_PORT` for VS Code integration + +### Azure Production + +Traces are sent to Azure Monitor Application Insights. View in: + +1. **Azure Portal** → Application Insights → Transaction Search +2. **Azure Portal** → Application Insights → Application Map (for distributed tracing) + +--- + +## Summary + +| Layer | Span | Instrumented By | +|-------|------|-----------------| +| HTTP Request | `http.request {method} {path}` | Enterprise Agent (manual) | +| Cosmos DB | `cosmos.{operation} {container}` | Enterprise Agent (manual) | +| Validation | `request.validate {operation}` | Enterprise Agent (manual) | +| Redis (outside agent) | `redis.{operation} {key}` | Enterprise Agent (manual) | +| AI Search (outside agent) | `ai_search.{operation} {index}` | Enterprise Agent (manual) | +| Agent Invocation | `invoke_agent {name}` | Agent Framework (automatic) | +| LLM Calls | `chat {model}` | Agent Framework (automatic) | +| Tool Execution | `execute_tool {function}` | Agent Framework (automatic) | + +**Key Insight**: If a service (Redis, AI Search, etc.) is invoked **as a tool** through `agent.run()`, the framework instruments it automatically. Only add manual spans for services called **outside** the agent context. diff --git a/python/samples/demos/enterprise-chat-agent/function_app.py b/python/samples/demos/enterprise-chat-agent/function_app.py index eecd0f67ea..026182835d 100644 --- a/python/samples/demos/enterprise-chat-agent/function_app.py +++ b/python/samples/demos/enterprise-chat-agent/function_app.py @@ -1,5 +1,7 @@ +# Copyright (c) Microsoft. All rights reserved. + """ -Enterprise Chat Agent - Azure Functions HTTP Triggers +Enterprise Chat Agent - Azure Functions Application This sample demonstrates a production-ready Chat API using Microsoft Agent Framework with Azure Functions. The agent is configured with multiple tools and autonomously @@ -9,361 +11,19 @@ - Azure Functions HTTP triggers for REST API endpoints - ChatAgent with runtime tool selection - Cosmos DB for persistent thread and message storage -- Partition key on thread_id for optimal query performance +- OpenTelemetry observability with automatic and custom spans """ -import json -import logging -import os -import uuid - import azure.functions as func -from azure.identity import DefaultAzureCredential -# TODO: Uncomment when implementing with actual Agent Framework -# from microsoft.agents.ai.azure import AzureOpenAIChatClient -# from microsoft.agents.core import ChatAgent +from services import init_observability +from routes import threads_bp, messages_bp, health_bp -from tools import get_weather, calculate, search_knowledge_base -from cosmos_store import CosmosConversationStore +# Initialize observability once at startup +init_observability() +# Create the Function App and register blueprints app = func.FunctionApp() - -# ----------------------------------------------------------------------------- -# Cosmos DB Storage (singleton for reuse across invocations) -# ----------------------------------------------------------------------------- -_store: CosmosConversationStore | None = None - - -def get_store() -> CosmosConversationStore: - """Get or create the Cosmos DB conversation store instance.""" - global _store - if _store is None: - _store = CosmosConversationStore() - logging.info("Initialized Cosmos DB conversation store") - return _store - - -# ----------------------------------------------------------------------------- -# Agent initialization (singleton pattern for reuse across invocations) -# ----------------------------------------------------------------------------- -_agent = None - - -def get_agent(): - """ - Get or create the ChatAgent instance. - Uses singleton pattern to reuse across function invocations. - """ - global _agent - if _agent is None: - # TODO: Initialize actual Agent Framework components - # credential = DefaultAzureCredential() - # chat_client = AzureOpenAIChatClient( - # endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - # model=os.environ["AZURE_OPENAI_MODEL"], - # credential=credential, - # ) - # _agent = ChatAgent( - # chat_client=chat_client, - # instructions="""You are a helpful assistant. - # Use the available tools when they can help answer the user's question. - # You can use multiple tools in a single response if needed.""", - # tools=[get_weather, calculate, search_knowledge_base], - # ) - logging.info("Agent initialized (placeholder)") - return _agent - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Create Thread -# ----------------------------------------------------------------------------- -@app.route(route="threads", methods=["POST"]) -async def create_thread(req: func.HttpRequest) -> func.HttpResponse: - """ - Create a new conversation thread. - - Request: - POST /api/threads - Body: {"metadata": {"user_id": "...", "session_type": "..."}} - - Response: - 201 Created - {"id": "thread_xxx", "created_at": "...", "metadata": {...}} - """ - try: - body = req.get_json() if req.get_body() else {} - except ValueError: - body = {} - - thread_id = f"thread_{uuid.uuid4().hex[:12]}" - metadata = body.get("metadata", {}) - - # Store thread in Cosmos DB - store = get_store() - thread = await store.create_thread(thread_id, metadata) - - logging.info(f"Created thread {thread_id}") - - return func.HttpResponse( - body=json.dumps(thread), - status_code=201, - mimetype="application/json", - ) - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Get Thread -# ----------------------------------------------------------------------------- -@app.route(route="threads/{thread_id}", methods=["GET"]) -async def get_thread(req: func.HttpRequest) -> func.HttpResponse: - """ - Get thread metadata. - - Request: - GET /api/threads/{thread_id} - - Response: - 200 OK - {"id": "thread_xxx", "created_at": "...", "metadata": {...}} - """ - thread_id = req.route_params.get("thread_id") - - store = get_store() - thread = await store.get_thread(thread_id) - - if thread is None: - return func.HttpResponse( - body=json.dumps({"error": "Thread not found"}), - status_code=404, - mimetype="application/json", - ) - - return func.HttpResponse( - body=json.dumps(thread), - mimetype="application/json", - ) - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Delete Thread -# ----------------------------------------------------------------------------- -@app.route(route="threads/{thread_id}", methods=["DELETE"]) -async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: - """ - Delete a thread and its messages. - - Request: - DELETE /api/threads/{thread_id} - - Response: - 204 No Content - """ - thread_id = req.route_params.get("thread_id") - - store = get_store() - deleted = await store.delete_thread(thread_id) - - if not deleted: - return func.HttpResponse( - body=json.dumps({"error": "Thread not found"}), - status_code=404, - mimetype="application/json", - ) - - logging.info(f"Deleted thread {thread_id}") - - return func.HttpResponse(status_code=204) - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Send Message -# ----------------------------------------------------------------------------- -@app.route(route="threads/{thread_id}/messages", methods=["POST"]) -async def send_message(req: func.HttpRequest) -> func.HttpResponse: - """ - Send a message to the agent and get a response. - - The agent will autonomously decide which tools to use based on the message content. - - Request: - POST /api/threads/{thread_id}/messages - Body: {"content": "What's the weather in Seattle?"} - - Response: - 200 OK - { - "id": "msg_xxx", - "thread_id": "thread_xxx", - "role": "assistant", - "content": "The weather in Seattle is...", - "tool_calls": [...], - "timestamp": "..." - } - """ - thread_id = req.route_params.get("thread_id") - - store = get_store() - - # Check if thread exists - if not await store.thread_exists(thread_id): - return func.HttpResponse( - body=json.dumps({"error": "Thread not found"}), - status_code=404, - mimetype="application/json", - ) - - try: - body = req.get_json() - content = body.get("content") - if not content: - return func.HttpResponse( - body=json.dumps({"error": "Missing 'content' in request body"}), - status_code=400, - mimetype="application/json", - ) - except ValueError: - return func.HttpResponse( - body=json.dumps({"error": "Invalid JSON body"}), - status_code=400, - mimetype="application/json", - ) - - # Store user message in Cosmos DB - user_message_id = f"msg_{uuid.uuid4().hex[:12]}" - await store.add_message( - thread_id=thread_id, - message_id=user_message_id, - role="user", - content=content, - ) - - # TODO: Replace with actual agent invocation - # agent = get_agent() - # response = await agent.run(content, thread_id=thread_id) - - # Placeholder response (demonstrates tool selection pattern) - tool_calls = [] - response_content = "" - - # Simple keyword-based tool selection demo - content_lower = content.lower() - if "weather" in content_lower: - # Extract location (simplified) - location = "Seattle" # Default - if "in " in content_lower: - location = content_lower.split("in ")[-1].split()[0].title() - weather_result = get_weather(location) - tool_calls.append({ - "tool": "get_weather", - "arguments": {"location": location}, - "result": weather_result, - }) - response_content += ( - f"The weather in {location} is {weather_result['temp']}°F " - f"with {weather_result['condition']}. " - ) - - if any(word in content_lower for word in ["calculate", "tip", "%", "percent"]): - # Simplified calculation demo - calc_result = calculate("85 * 0.15") - tool_calls.append({ - "tool": "calculate", - "arguments": {"expression": "85 * 0.15"}, - "result": calc_result, - }) - response_content += f"A 15% tip on $85 is ${calc_result:.2f}." - - if not response_content: - response_content = ( - f"I received your message: '{content}'. How can I help you further?" - ) - - # Store assistant response in Cosmos DB - assistant_message_id = f"msg_{uuid.uuid4().hex[:12]}" - assistant_message = await store.add_message( - thread_id=thread_id, - message_id=assistant_message_id, - role="assistant", - content=response_content.strip(), - tool_calls=tool_calls if tool_calls else None, - ) - - logging.info( - f"Processed message for thread {thread_id}, " - f"tools used: {[t['tool'] for t in tool_calls]}" - ) - - return func.HttpResponse( - body=json.dumps(assistant_message), - mimetype="application/json", - ) - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Get Messages -# ----------------------------------------------------------------------------- -@app.route(route="threads/{thread_id}/messages", methods=["GET"]) -async def get_messages(req: func.HttpRequest) -> func.HttpResponse: - """ - Get conversation history for a thread. - - Request: - GET /api/threads/{thread_id}/messages - - Response: - 200 OK - {"messages": [...]} - """ - thread_id = req.route_params.get("thread_id") - - store = get_store() - - # Check if thread exists - if not await store.thread_exists(thread_id): - return func.HttpResponse( - body=json.dumps({"error": "Thread not found"}), - status_code=404, - mimetype="application/json", - ) - - messages = await store.get_messages(thread_id) - - return func.HttpResponse( - body=json.dumps({"messages": messages}), - mimetype="application/json", - ) - - -# ----------------------------------------------------------------------------- -# HTTP Trigger: Health Check -# ----------------------------------------------------------------------------- -@app.route(route="health", methods=["GET"]) -async def health_check(req: func.HttpRequest) -> func.HttpResponse: - """ - Health check endpoint for monitoring. - - Request: - GET /api/health - - Response: - 200 OK - {"status": "healthy", "version": "1.0.0", "cosmos_connected": true} - """ - cosmos_connected = False - try: - store = get_store() - # Simple connectivity check - store.container # This will initialize the connection if not already done - cosmos_connected = True - except Exception as e: - logging.warning(f"Cosmos DB connectivity check failed: {e}") - - return func.HttpResponse( - body=json.dumps({ - "status": "healthy", - "version": "1.0.0", - "cosmos_connected": cosmos_connected, - }), - mimetype="application/json", - ) +app.register_functions(threads_bp) +app.register_functions(messages_bp) +app.register_functions(health_bp) diff --git a/python/samples/demos/enterprise-chat-agent/local.settings.json.example b/python/samples/demos/enterprise-chat-agent/local.settings.json.example index 4ffbb3ca42..8056b3897f 100644 --- a/python/samples/demos/enterprise-chat-agent/local.settings.json.example +++ b/python/samples/demos/enterprise-chat-agent/local.settings.json.example @@ -8,6 +8,10 @@ "AZURE_OPENAI_API_VERSION": "2024-10-21", "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", "AZURE_COSMOS_DATABASE_NAME": "chat_db", - "AZURE_COSMOS_CONTAINER_NAME": "messages" + "AZURE_COSMOS_CONTAINER_NAME": "messages", + "ENABLE_OTEL": "true", + "ENABLE_SENSITIVE_DATA": "false", + "OTLP_ENDPOINT": "http://localhost:4317", + "OTEL_SERVICE_NAME": "enterprise-chat-agent" } } diff --git a/python/samples/demos/enterprise-chat-agent/requirements.txt b/python/samples/demos/enterprise-chat-agent/requirements.txt index 0fcf2bd020..5c7bb3e0ba 100644 --- a/python/samples/demos/enterprise-chat-agent/requirements.txt +++ b/python/samples/demos/enterprise-chat-agent/requirements.txt @@ -2,9 +2,7 @@ azure-functions>=1.21.0 # Microsoft Agent Framework -# TODO: Uncomment when packages are published to PyPI -# microsoft-agents-core -# microsoft-agents-ai-azure +agent-framework --pre # Azure SDK azure-identity>=1.15.0 @@ -13,5 +11,16 @@ azure-cosmos>=4.7.0 # Azure OpenAI (used directly until Agent Framework packages are published) openai>=1.0.0 +# OpenTelemetry Core +opentelemetry-api>=1.25.0 +opentelemetry-sdk>=1.25.0 + +# OpenTelemetry Exporters +opentelemetry-exporter-otlp>=1.25.0 +azure-monitor-opentelemetry-exporter>=1.0.0b41 + +# OpenTelemetry Semantic Conventions (matches agent_framework) +opentelemetry-semantic-conventions-ai>=0.4.13 + # Utilities pydantic>=2.0.0 diff --git a/python/samples/demos/enterprise-chat-agent/routes/__init__.py b/python/samples/demos/enterprise-chat-agent/routes/__init__.py new file mode 100644 index 0000000000..1503963b14 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/routes/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Route blueprints for Enterprise Chat Agent. + +This package contains Azure Functions blueprints organized by resource: +- threads: Thread CRUD operations +- messages: Message send/retrieve operations +- health: Health check endpoint +""" + +from routes.threads import bp as threads_bp +from routes.messages import bp as messages_bp +from routes.health import bp as health_bp + +__all__ = ["threads_bp", "messages_bp", "health_bp"] diff --git a/python/samples/demos/enterprise-chat-agent/routes/health.py b/python/samples/demos/enterprise-chat-agent/routes/health.py new file mode 100644 index 0000000000..623b3ab9f2 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/routes/health.py @@ -0,0 +1,43 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Health check endpoint.""" + +import json +import logging + +import azure.functions as func + +from routes.threads import get_store + +bp = func.Blueprint() + + +@bp.route(route="health", methods=["GET"]) +async def health_check(req: func.HttpRequest) -> func.HttpResponse: + """ + Health check endpoint for monitoring. + + Request: + GET /api/health + + Response: + 200 OK + {"status": "healthy", "version": "1.0.0", "cosmos_connected": true} + """ + cosmos_connected = False + try: + store = get_store() + # Simple connectivity check - initializes connection if needed + store.container + cosmos_connected = True + except Exception as e: + logging.warning(f"Cosmos DB connectivity check failed: {e}") + + return func.HttpResponse( + body=json.dumps({ + "status": "healthy", + "version": "1.0.0", + "cosmos_connected": cosmos_connected, + }), + mimetype="application/json", + ) diff --git a/python/samples/demos/enterprise-chat-agent/routes/messages.py b/python/samples/demos/enterprise-chat-agent/routes/messages.py new file mode 100644 index 0000000000..48350772bc --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/routes/messages.py @@ -0,0 +1,208 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Message endpoints for thread conversations.""" + +import json +import logging +import uuid + +import azure.functions as func + +from services import http_request_span, cosmos_span +from routes.threads import get_store +from tools import get_weather, calculate, search_knowledge_base + +bp = func.Blueprint() + + +@bp.route(route="threads/{thread_id}/messages", methods=["POST"]) +async def send_message(req: func.HttpRequest) -> func.HttpResponse: + """ + Send a message to the agent and get a response. + + The agent will autonomously decide which tools to use based on + the message content. + + Request: + POST /api/threads/{thread_id}/messages + Body: {"content": "What's the weather in Seattle?"} + + Response: + 200 OK + { + "id": "msg_xxx", + "thread_id": "thread_xxx", + "role": "assistant", + "content": "The weather in Seattle is...", + "tool_calls": [...], + "timestamp": "..." + } + """ + thread_id = req.route_params.get("thread_id") + + async with http_request_span( + "POST", "/threads/{thread_id}/messages", thread_id=thread_id + ) as span: + store = get_store() + + # Check if thread exists + async with cosmos_span("read", "threads", thread_id): + thread_exists = await store.thread_exists(thread_id) + + if not thread_exists: + span.set_attribute("http.status_code", 404) + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + try: + body = req.get_json() + content = body.get("content") + if not content: + span.set_attribute("http.status_code", 400) + return func.HttpResponse( + body=json.dumps( + {"error": "Missing 'content' in request body"} + ), + status_code=400, + mimetype="application/json", + ) + except ValueError: + span.set_attribute("http.status_code", 400) + return func.HttpResponse( + body=json.dumps({"error": "Invalid JSON body"}), + status_code=400, + mimetype="application/json", + ) + + # Store user message in Cosmos DB + user_message_id = f"msg_{uuid.uuid4().hex[:12]}" + async with cosmos_span("upsert", "messages", thread_id): + await store.add_message( + thread_id=thread_id, + message_id=user_message_id, + role="user", + content=content, + metadata={"client": "http_api"}, + ) + + # TODO: Replace with actual agent invocation + # agent = get_agent() + # response = await agent.run(content, thread_id=thread_id) + # Framework auto-creates: invoke_agent, chat, execute_tool spans + + # Placeholder response (demonstrates tool selection pattern) + tool_calls = [] + response_content = "" + + # Simple keyword-based tool selection demo + content_lower = content.lower() + if "weather" in content_lower: + location = "Seattle" # Default + if "in " in content_lower: + location = content_lower.split("in ")[-1].split()[0].title() + weather_result = get_weather(location) + tool_calls.append({ + "tool": "get_weather", + "arguments": {"location": location}, + "result": weather_result, + }) + response_content += ( + f"The weather in {location} is {weather_result['temp']}°F " + f"with {weather_result['condition']}. " + ) + + calc_keywords = ["calculate", "tip", "%", "percent"] + if any(word in content_lower for word in calc_keywords): + calc_result = calculate("85 * 0.15") + tool_calls.append({ + "tool": "calculate", + "arguments": {"expression": "85 * 0.15"}, + "result": calc_result, + }) + response_content += f"A 15% tip on $85 is ${calc_result:.2f}." + + if not response_content: + response_content = ( + f"I received your message: '{content}'. " + "How can I help you further?" + ) + + # Store assistant response in Cosmos DB + assistant_message_id = f"msg_{uuid.uuid4().hex[:12]}" + + # Example: Add sources for RAG + sources = None + if "weather" in content_lower: + sources = [ + { + "title": "Weather Service API", + "url": "https://api.weather.example.com", + "snippet": "Real-time weather data", + } + ] + + async with cosmos_span("upsert", "messages", thread_id): + assistant_message = await store.add_message( + thread_id=thread_id, + message_id=assistant_message_id, + role="assistant", + content=response_content.strip(), + tool_calls=tool_calls if tool_calls else None, + sources=sources, + metadata={"model": "placeholder"}, + ) + + logging.info( + f"Processed message for thread {thread_id}, " + f"tools used: {[t['tool'] for t in tool_calls]}" + ) + + span.set_attribute("http.status_code", 200) + return func.HttpResponse( + body=json.dumps(assistant_message), + mimetype="application/json", + ) + + +@bp.route(route="threads/{thread_id}/messages", methods=["GET"]) +async def get_messages(req: func.HttpRequest) -> func.HttpResponse: + """ + Get conversation history for a thread. + + Request: + GET /api/threads/{thread_id}/messages + + Response: + 200 OK + {"messages": [...]} + """ + thread_id = req.route_params.get("thread_id") + + async with http_request_span( + "GET", "/threads/{thread_id}/messages", thread_id=thread_id + ) as span: + store = get_store() + + # Check if thread exists + async with cosmos_span("read", "threads", thread_id): + thread_exists = await store.thread_exists(thread_id) + + if not thread_exists: + span.set_attribute("http.status_code", 404) + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + async with cosmos_span("query", "messages", thread_id): + messages = await store.get_messages(thread_id) + + span.set_attribute("http.status_code", 200) + return func.HttpResponse( + body=json.dumps({"messages": messages}), + mimetype="application/json", + ) diff --git a/python/samples/demos/enterprise-chat-agent/routes/threads.py b/python/samples/demos/enterprise-chat-agent/routes/threads.py new file mode 100644 index 0000000000..4428a317e9 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/routes/threads.py @@ -0,0 +1,139 @@ +# Copyright (c) Microsoft. All rights reserved. + +"""Thread management endpoints.""" + +import json +import logging +import uuid + +import azure.functions as func + +from services import ( + CosmosConversationStore, + http_request_span, + cosmos_span, +) + +bp = func.Blueprint() + +# Cosmos DB store (lazy singleton) +_store: CosmosConversationStore | None = None + + +def get_store() -> CosmosConversationStore: + """Get or create the Cosmos DB conversation store instance.""" + global _store + if _store is None: + _store = CosmosConversationStore() + logging.info("Initialized Cosmos DB conversation store") + return _store + + +@bp.route(route="threads", methods=["POST"]) +async def create_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Create a new conversation thread. + + Request: + POST /api/threads + Body: {"user_id": "...", "title": "...", "metadata": {...}} + + Response: + 201 Created + {"id": "thread_xxx", "created_at": "...", ...} + """ + try: + body = req.get_json() if req.get_body() else {} + except ValueError: + body = {} + + thread_id = f"thread_{uuid.uuid4().hex[:12]}" + user_id = body.get("user_id", "anonymous") + title = body.get("title") + metadata = body.get("metadata", {}) + + async with http_request_span("POST", "/threads", user_id=user_id) as span: + store = get_store() + async with cosmos_span("create", "threads", thread_id): + thread = await store.create_thread( + thread_id, user_id, title, metadata + ) + + logging.info(f"Created thread {thread_id}") + + span.set_attribute("http.status_code", 201) + return func.HttpResponse( + body=json.dumps(thread), + status_code=201, + mimetype="application/json", + ) + + +@bp.route(route="threads/{thread_id}", methods=["GET"]) +async def get_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Get thread metadata. + + Request: + GET /api/threads/{thread_id} + + Response: + 200 OK + {"id": "thread_xxx", "created_at": "...", ...} + """ + thread_id = req.route_params.get("thread_id") + + async with http_request_span( + "GET", "/threads/{thread_id}", thread_id=thread_id + ) as span: + store = get_store() + async with cosmos_span("read", "threads", thread_id): + thread = await store.get_thread(thread_id) + + if thread is None: + span.set_attribute("http.status_code", 404) + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + span.set_attribute("http.status_code", 200) + return func.HttpResponse( + body=json.dumps(thread), + mimetype="application/json", + ) + + +@bp.route(route="threads/{thread_id}", methods=["DELETE"]) +async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: + """ + Delete a thread and its messages. + + Request: + DELETE /api/threads/{thread_id} + + Response: + 204 No Content + """ + thread_id = req.route_params.get("thread_id") + + async with http_request_span( + "DELETE", "/threads/{thread_id}", thread_id=thread_id + ) as span: + store = get_store() + async with cosmos_span("delete", "threads", thread_id): + deleted = await store.delete_thread(thread_id) + + if not deleted: + span.set_attribute("http.status_code", 404) + return func.HttpResponse( + body=json.dumps({"error": "Thread not found"}), + status_code=404, + mimetype="application/json", + ) + + logging.info(f"Deleted thread {thread_id}") + + span.set_attribute("http.status_code", 204) + return func.HttpResponse(status_code=204) diff --git a/python/samples/demos/enterprise-chat-agent/services/__init__.py b/python/samples/demos/enterprise-chat-agent/services/__init__.py new file mode 100644 index 0000000000..459ed32b26 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/services/__init__.py @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Core modules for Enterprise Chat Agent. + +This package contains foundational components: +- cosmos_store: Azure Cosmos DB storage for threads and messages +- observability: OpenTelemetry instrumentation for tracing +""" + +from services.cosmos_store import CosmosConversationStore +from services.observability import ( + init_observability, + http_request_span, + cosmos_span, + validation_span, + EnterpriseAgentAttr, +) + +__all__ = [ + "CosmosConversationStore", + "init_observability", + "http_request_span", + "cosmos_span", + "validation_span", + "EnterpriseAgentAttr", +] diff --git a/python/samples/demos/enterprise-chat-agent/cosmos_store.py b/python/samples/demos/enterprise-chat-agent/services/cosmos_store.py similarity index 70% rename from python/samples/demos/enterprise-chat-agent/cosmos_store.py rename to python/samples/demos/enterprise-chat-agent/services/cosmos_store.py index fad9a91917..beed74c1df 100644 --- a/python/samples/demos/enterprise-chat-agent/cosmos_store.py +++ b/python/samples/demos/enterprise-chat-agent/services/cosmos_store.py @@ -75,27 +75,42 @@ def container(self): # Thread Operations # ------------------------------------------------------------------------- - async def create_thread(self, thread_id: str, metadata: dict | None = None) -> dict: + async def create_thread( + self, + thread_id: str, + user_id: str, + title: str | None = None, + metadata: dict | None = None, + ) -> dict: """ Create a new conversation thread. Args: thread_id: Unique thread identifier. - metadata: Optional metadata (user_id, session_type, etc.). + user_id: Owner's user ID. + title: Optional thread title. + metadata: Optional custom metadata. Returns: The created thread document. """ + now = datetime.now(timezone.utc).isoformat() thread = { "id": thread_id, "thread_id": thread_id, # Partition key "type": "thread", - "created_at": datetime.now(timezone.utc).isoformat(), + "user_id": user_id, + "title": title, + "status": "active", + "message_count": 0, + "created_at": now, + "updated_at": now, + "last_message_preview": None, "metadata": metadata or {}, } self.container.create_item(body=thread) - logging.info(f"Created thread {thread_id} in Cosmos DB") + logging.info(f"Created thread {thread_id} for user {user_id} in Cosmos DB") return thread async def get_thread(self, thread_id: str) -> dict | None: @@ -158,32 +173,51 @@ async def add_message( role: str, content: str, tool_calls: list[dict] | None = None, + sources: list[dict] | None = None, + metadata: dict | None = None, ) -> dict: """ - Add a message to a thread. + Add a message to a thread and update thread metadata. Args: thread_id: Thread identifier (partition key). message_id: Unique message identifier. - role: Message role ('user' or 'assistant'). + role: Message role ('user', 'assistant', or 'system'). content: Message content. tool_calls: Optional list of tool calls made by the agent. + sources: Optional RAG sources (for assistant messages). + metadata: Optional custom metadata. Returns: The created message document. """ message = { "id": message_id, + "message_id": message_id, "thread_id": thread_id, # Partition key "type": "message", "role": role, "content": content, - "tool_calls": tool_calls, "timestamp": datetime.now(timezone.utc).isoformat(), + "tool_calls": tool_calls, + "sources": sources, + "metadata": metadata or {}, } self.container.create_item(body=message) logging.info(f"Added {role} message {message_id} to thread {thread_id}") + + # Update thread metadata + thread = await self.get_thread(thread_id) + if thread: + # Truncate content for preview (first 100 chars) + preview = content[:100] + "..." if len(content) > 100 else content + await self.update_thread( + thread_id=thread_id, + message_count=thread.get("message_count", 0) + 1, + last_message_preview=preview, + ) + return message async def get_messages( @@ -218,6 +252,47 @@ async def get_messages( return messages + async def update_thread( + self, + thread_id: str, + title: str | None = None, + status: str | None = None, + message_count: int | None = None, + last_message_preview: str | None = None, + ) -> dict | None: + """ + Update thread metadata. + + Args: + thread_id: Thread identifier. + title: New title (optional). + status: New status - 'active', 'archived', or 'deleted' (optional). + message_count: New message count (optional). + last_message_preview: Preview of last message (optional). + + Returns: + Updated thread document or None if not found. + """ + thread = await self.get_thread(thread_id) + if thread is None: + return None + + # Update fields + if title is not None: + thread["title"] = title + if status is not None: + thread["status"] = status + if message_count is not None: + thread["message_count"] = message_count + if last_message_preview is not None: + thread["last_message_preview"] = last_message_preview + + thread["updated_at"] = datetime.now(timezone.utc).isoformat() + + updated = self.container.replace_item(item=thread_id, body=thread) + logging.info(f"Updated thread {thread_id}") + return updated + async def thread_exists(self, thread_id: str) -> bool: """ Check if a thread exists. diff --git a/python/samples/demos/enterprise-chat-agent/services/observability.py b/python/samples/demos/enterprise-chat-agent/services/observability.py new file mode 100644 index 0000000000..e6a6956b28 --- /dev/null +++ b/python/samples/demos/enterprise-chat-agent/services/observability.py @@ -0,0 +1,177 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Observability module for Enterprise Chat Agent. + +Provides complementary spans for layers the Agent Framework doesn't instrument: +- HTTP request lifecycle +- Cosmos DB operations +- Request validation + +Uses the framework's setup_observability() and get_tracer() APIs. +""" + +import logging +from contextlib import asynccontextmanager +from typing import AsyncIterator, Optional + +from opentelemetry.trace import Span, SpanKind, Status, StatusCode + +# Import framework's observability - use framework APIs, don't recreate them +from agent_framework.observability import setup_observability, get_tracer + +logger = logging.getLogger(__name__) + + +class EnterpriseAgentAttr: + """Custom semantic attributes for enterprise chat agent.""" + + # Thread/User context + THREAD_ID = "enterprise_agent.thread.id" + USER_ID = "enterprise_agent.user.id" + + # Cosmos DB attributes (following OpenTelemetry DB conventions) + COSMOS_CONTAINER = "db.cosmosdb.container" + COSMOS_OPERATION = "db.operation" + COSMOS_PARTITION_KEY = "db.cosmosdb.partition_key" + + +def init_observability() -> None: + """Initialize observability using the Agent Framework's setup. + + Call once at Azure Functions app startup. + + The framework handles: + - TracerProvider configuration + - MeterProvider configuration + - LoggerProvider configuration + - OTLP and Azure Monitor exporters + + Environment variables used: + - ENABLE_OTEL: Enable OpenTelemetry (default: false) + - ENABLE_SENSITIVE_DATA: Log message contents (default: false) + - OTLP_ENDPOINT: OTLP collector endpoint + - APPLICATIONINSIGHTS_CONNECTION_STRING: Azure Monitor connection + - OTEL_SERVICE_NAME: Service name (default: agent_framework) + """ + try: + setup_observability() + logger.info("Observability initialized successfully") + except Exception as e: + logger.warning(f"Failed to initialize observability: {e}") + + +@asynccontextmanager +async def http_request_span( + method: str, + path: str, + thread_id: Optional[str] = None, + user_id: Optional[str] = None, +) -> AsyncIterator[Span]: + """Create a top-level HTTP request span. + + Wraps the entire request lifecycle. Child spans (Cosmos, agent invocation) + will be nested under this span. + + The span is yielded so callers can set http.status_code before exiting. + + Args: + method: HTTP method (GET, POST, DELETE, etc.) + path: Route pattern (e.g., "/threads/{thread_id}/messages") + thread_id: Thread identifier for correlation + user_id: User identifier for correlation + + Yields: + The active span for setting additional attributes like status code. + """ + tracer = get_tracer("enterprise_chat_agent") + attributes = { + "http.method": method, + "http.route": path, + } + if thread_id: + attributes[EnterpriseAgentAttr.THREAD_ID] = thread_id + if user_id: + attributes[EnterpriseAgentAttr.USER_ID] = user_id + + with tracer.start_as_current_span( + f"http.request {method} {path}", + kind=SpanKind.SERVER, + attributes=attributes, + ) as span: + try: + yield span + # Check if status_code was set; determine success based on it + status_code = span.attributes.get("http.status_code") if hasattr( + span, 'attributes' + ) else None + if status_code and status_code >= 400: + span.set_status(Status(StatusCode.ERROR)) + else: + span.set_status(Status(StatusCode.OK)) + except Exception as e: + span.set_status(Status(StatusCode.ERROR, str(e))) + span.record_exception(e) + raise + + +@asynccontextmanager +async def cosmos_span( + operation: str, + container: str, + partition_key: Optional[str] = None, +) -> AsyncIterator[None]: + """Create a Cosmos DB operation span. + + Tracks database operations with OpenTelemetry database semantic conventions. + + Args: + operation: Database operation (read, query, upsert, delete, create) + container: Cosmos DB container name + partition_key: Partition key value for the operation + """ + tracer = get_tracer("enterprise_chat_agent") + attributes = { + "db.system": "cosmosdb", + EnterpriseAgentAttr.COSMOS_OPERATION: operation, + EnterpriseAgentAttr.COSMOS_CONTAINER: container, + } + if partition_key: + attributes[EnterpriseAgentAttr.COSMOS_PARTITION_KEY] = partition_key + + with tracer.start_as_current_span( + f"cosmos.{operation} {container}", + kind=SpanKind.CLIENT, + attributes=attributes, + ) as span: + try: + yield + span.set_status(Status(StatusCode.OK)) + except Exception as e: + span.set_status(Status(StatusCode.ERROR, str(e))) + span.record_exception(e) + raise + + +@asynccontextmanager +async def validation_span(operation: str) -> AsyncIterator[None]: + """Create a request validation span. + + Tracks authorization and validation checks. + + Args: + operation: Validation operation name (e.g., "verify_thread_ownership") + """ + tracer = get_tracer("enterprise_chat_agent") + + with tracer.start_as_current_span( + f"request.validate {operation}", + kind=SpanKind.INTERNAL, + ) as span: + try: + yield + span.set_status(Status(StatusCode.OK)) + except Exception as e: + span.set_status(Status(StatusCode.ERROR, str(e))) + span.record_exception(e) + raise From ba260838dce8abbfe81933bca00cee28fb32e68f Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Tue, 24 Mar 2026 23:34:59 +0000 Subject: [PATCH 03/10] feat: Implement Enterprise Chat Agent with Cosmos DB and Azure Functions - Added core modules for the Enterprise Chat Agent including services for Cosmos DB storage, observability, and agent functionality. - Implemented CosmosConversationStore for managing conversation thread metadata in Azure Cosmos DB. - Developed agent_service to handle chat agent creation with integrated tools for weather, calculation, and knowledge base search. - Introduced observability features using OpenTelemetry for tracking HTTP requests and Cosmos DB operations. - Created local tools for weather, calculation, and knowledge base search, enabling the agent to autonomously select tools based on user input. - Removed outdated design documentation to streamline project structure. --- .../enterprise-chat-agent/.gitignore | 0 .../enterprise-chat-agent/README.md | 49 +- .../enterprise-chat-agent/azure.yaml | 0 .../enterprise-chat-agent/demo.http | 68 ++- .../docs/AGENT_IMPLEMENTATION.md | 224 ++++++++ .../enterprise-chat-agent/docs/DESIGN.md | 249 +++++++++ .../docs/MCP_INTEGRATION.md | 164 ++++++ .../docs/observability-design.md | 0 .../enterprise-chat-agent/function_app.py | 0 .../enterprise-chat-agent/host.json | 0 .../infra/abbreviations.json | 0 .../infra/core/database/cosmos-nosql.bicep | 0 .../infra/core/host/function-app.bicep | 0 .../infra/core/monitor/monitoring.bicep | 0 .../infra/core/storage/storage-account.bicep | 0 .../enterprise-chat-agent/infra/main.bicep | 0 .../infra/main.parameters.json | 0 .../local.settings.json.example | 0 .../prompts/system_prompt.txt | 28 + .../enterprise-chat-agent/requirements.txt | 13 +- .../enterprise-chat-agent/routes/__init__.py | 0 .../enterprise-chat-agent/routes/health.py | 0 .../enterprise-chat-agent/routes/messages.py | 143 +++--- .../enterprise-chat-agent/routes/threads.py | 13 +- .../services/__init__.py | 13 +- .../services/agent_service.py | 189 +++++++ .../services/cosmos_store.py | 155 ++---- .../services/observability.py | 11 +- .../enterprise-chat-agent/tools/__init__.py | 27 + .../enterprise-chat-agent/tools/calculator.py | 5 +- .../tools/knowledge_base.py | 5 +- .../enterprise-chat-agent/tools/weather.py | 5 +- .../enterprise-chat-agent/docs/DESIGN.md | 478 ------------------ .../enterprise-chat-agent/tools/__init__.py | 16 - 34 files changed, 1131 insertions(+), 724 deletions(-) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/.gitignore (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/README.md (75%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/azure.yaml (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/demo.http (54%) create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/docs/observability-design.md (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/function_app.py (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/host.json (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/abbreviations.json (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/core/host/function-app.bicep (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/core/monitor/monitoring.bicep (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/core/storage/storage-account.bicep (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/main.bicep (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/infra/main.parameters.json (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/local.settings.json.example (100%) create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/prompts/system_prompt.txt rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/requirements.txt (54%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/routes/__init__.py (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/routes/health.py (100%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/routes/messages.py (52%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/routes/threads.py (88%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/services/__init__.py (62%) create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/services/cosmos_store.py (55%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/services/observability.py (93%) create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/tools/calculator.py (94%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/tools/knowledge_base.py (95%) rename python/samples/{demos => 05-end-to-end}/enterprise-chat-agent/tools/weather.py (85%) delete mode 100644 python/samples/demos/enterprise-chat-agent/docs/DESIGN.md delete mode 100644 python/samples/demos/enterprise-chat-agent/tools/__init__.py diff --git a/python/samples/demos/enterprise-chat-agent/.gitignore b/python/samples/05-end-to-end/enterprise-chat-agent/.gitignore similarity index 100% rename from python/samples/demos/enterprise-chat-agent/.gitignore rename to python/samples/05-end-to-end/enterprise-chat-agent/.gitignore diff --git a/python/samples/demos/enterprise-chat-agent/README.md b/python/samples/05-end-to-end/enterprise-chat-agent/README.md similarity index 75% rename from python/samples/demos/enterprise-chat-agent/README.md rename to python/samples/05-end-to-end/enterprise-chat-agent/README.md index 9500386b96..45770b5d4d 100644 --- a/python/samples/demos/enterprise-chat-agent/README.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/README.md @@ -18,11 +18,12 @@ This sample showcases: Client → Azure Functions (HTTP Triggers) → ChatAgent → Azure OpenAI ↓ [Tools] - ┌────────┼────────┐ - ↓ ↓ ↓ - Weather Calculator Search - ↓ - Cosmos DB (Persistence) + ┌─────────┼──────────┐ + ↓ ↓ ↓ + Weather Calculator Knowledge Base + ↓ + Microsoft Docs ← → Azure Cosmos DB + (MCP Integration) ``` ## Prerequisites @@ -148,10 +149,31 @@ User: "What's the weather in Tokyo?" User: "What's the weather in Paris and what's 18% tip on €75?" → Agent calls: get_weather("Paris") AND calculate("75 * 0.18") +User: "How do I configure partition keys in Azure Cosmos DB?" +→ Agent calls: search_microsoft_docs("Cosmos DB partition keys") +→ Returns: Official Microsoft documentation with best practices + +User: "Show me Python code for Azure OpenAI chat completion" +→ Agent calls: search_microsoft_code_samples("Azure OpenAI chat", language="python") +→ Returns: Official code examples from Microsoft Learn + +User: "What's your return policy?" +→ Agent calls: search_knowledge_base("return policy") + User: "Tell me a joke" → Agent responds directly (no tools needed) ``` +### Available Tools + +| Tool | Description | Example Use | +|------|-------------|-------------| +| `search_microsoft_docs` | Search official Microsoft/Azure docs | Azure services, cloud architecture | +| `search_microsoft_code_samples` | Find code examples from Microsoft Learn | SDK usage, implementation samples | +| `search_knowledge_base` | Internal company knowledge | Policies, FAQs, procedures | +| `get_weather` | Current weather data | Weather queries | +| `calculate` | Safe math evaluation | Calculations, tips, conversions | + ## Project Structure ```text @@ -168,7 +190,8 @@ enterprise-chat-agent/ │ ├── __init__.py │ ├── weather.py # Weather tool │ ├── calculator.py # Calculator tool -│ └── knowledge_base.py # Knowledge base search tool +│ ├── knowledge_base.py # Knowledge base search tool +│ └── microsoft_docs.py # Microsoft Docs MCP integration ├── infra/ # Infrastructure as Code (Bicep) │ ├── main.bicep # Main deployment template │ ├── main.parameters.json # Parameter file @@ -200,3 +223,17 @@ See [DESIGN.md](./DESIGN.md) for: - [GitHub Issue #2436](https://github.com/microsoft/agent-framework/issues/2436) - [Microsoft Agent Framework Documentation](https://learn.microsoft.com/agent-framework/) - [Azure Functions Python Developer Guide](https://learn.microsoft.com/azure/azure-functions/functions-reference-python) + +## Implementation Status + +### ✅ Completed +- ✅ Create tools (weather, calculator, knowledge_base) +- ✅ Create an agent (ChatAgent with Azure OpenAI) +- ✅ Use tools with agents (@ai_function decorators + agent configuration) +- ✅ Cosmos DB persistence +- ✅ OpenTelemetry observability + +### 🔄 Pending +- ⏳ Test agent locally with `func start` +- ⏳ Check the logs in Application Insights +- ⏳ Deploy to Azure with `azd up` diff --git a/python/samples/demos/enterprise-chat-agent/azure.yaml b/python/samples/05-end-to-end/enterprise-chat-agent/azure.yaml similarity index 100% rename from python/samples/demos/enterprise-chat-agent/azure.yaml rename to python/samples/05-end-to-end/enterprise-chat-agent/azure.yaml diff --git a/python/samples/demos/enterprise-chat-agent/demo.http b/python/samples/05-end-to-end/enterprise-chat-agent/demo.http similarity index 54% rename from python/samples/demos/enterprise-chat-agent/demo.http rename to python/samples/05-end-to-end/enterprise-chat-agent/demo.http index 7b45da810b..348331cf4b 100644 --- a/python/samples/demos/enterprise-chat-agent/demo.http +++ b/python/samples/05-end-to-end/enterprise-chat-agent/demo.http @@ -20,7 +20,7 @@ POST {{baseUrl}}/threads Content-Type: application/json { - "user_id": "user_123", + "user_id": "user_1234", "title": "Customer Support Chat", "metadata": { "session_type": "support", @@ -62,7 +62,71 @@ Content-Type: application/json "content": "What's the weather in Tokyo and what's 20% of 150?" } -### Example 4: No Tools Needed +### Example 4: Knowledge Base Search +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What's your return policy?" +} + +### Example 5: Microsoft Docs - Azure Service +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "How do I configure partition keys in Azure Cosmos DB?" +} + +### Example 6: Microsoft Docs - Best Practices +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What are the best practices for Azure Functions error handling?" +} + +### Example 7: Microsoft Code Samples - Python +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "Show me Python code for Azure OpenAI chat completion" +} + +### Example 8: Microsoft Code Samples - General +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "How do I authenticate to Azure using DefaultAzureCredential? Show me code examples." +} + +### Example 9: Complex Query - Multiple Tools +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What's the weather in Seattle? Also, explain how Azure Functions scales and show me example code for HTTP triggers." +} + +### Example 10: Azure Architecture Question +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "What's the difference between Azure App Service and Azure Container Apps?" +} + +### Example 11: Deployment Question +POST {{baseUrl}}/threads/{{threadId}}/messages +Content-Type: application/json + +{ + "content": "How do I deploy a Python application to Azure using azd?" +} + +### Example 12: No Tools Needed POST {{baseUrl}}/threads/{{threadId}}/messages Content-Type: application/json diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md new file mode 100644 index 0000000000..2f62b12e8d --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md @@ -0,0 +1,224 @@ +# Agent Implementation Guide + +## What Was Implemented + +### 1. ChatAgent Service with CosmosHistoryProvider (`services/agent_service.py`) + +Created a new service module that: +- Initializes a singleton `ChatAgent` instance +- Configures `CosmosHistoryProvider` for automatic conversation persistence +- Configures Azure OpenAI client with Azure AD authentication +- Registers 5 tools: `get_weather`, `calculate`, `search_knowledge_base`, `search_microsoft_docs`, `search_microsoft_code_samples` +- Defines system instructions for the agent + +**Key Features:** +```python +from agent_framework_azure_cosmos import CosmosHistoryProvider + +# History provider automatically loads/stores conversation history +history_provider = CosmosHistoryProvider( + source_id="enterprise_chat_agent", + endpoint=os.environ["AZURE_COSMOS_ENDPOINT"], + database_name="chat_db", + container_name="messages", + credential=DefaultAzureCredential(), + load_messages=True, # Auto-load history before each run + store_inputs=True, # Auto-store user messages + store_outputs=True, # Auto-store assistant responses +) + +# Agent uses history provider as context provider +agent = ChatAgent( + chat_client=client, + instructions="You are a helpful enterprise chat assistant...", + tools=[get_weather, calculate, search_knowledge_base, ...], + context_providers=[history_provider], # Auto-persist history! + name="EnterpriseAssistant", +) +``` + +### 2. Tool Updates + +All tools use the `@ai_function` decorator: + +```python +from agent_framework.ai import ai_function + +@ai_function +def get_weather(location: str) -> dict: + """Get current weather for a location.""" + ... + +@ai_function +def search_microsoft_docs(query: str) -> list[dict]: + """Search official Microsoft documentation.""" + ... +``` + +This decorator enables the agent to: +- Discover and call tools automatically +- Generate proper function call schemas +- Handle tool execution and response parsing + +### 3. Simplified Message Route (`routes/messages.py`) + +**Before (Manual storage):** +```python +# Store user message manually +await store.add_message(thread_id, user_message_id, "user", content) + +# Load history manually +message_history = await store.get_messages(thread_id) +chat_messages = convert_messages_to_chat_messages(message_history) + +# Run agent +response = await agent.run(chat_messages) + +# Store response manually +await store.add_message(thread_id, assistant_message_id, "assistant", response.content) +``` + +**After (With CosmosHistoryProvider):** +```python +# Get agent (configured with CosmosHistoryProvider) +agent = get_agent() + +# Run agent - history is loaded and stored automatically! +response = await agent.run(content, session_id=thread_id) +``` + +The `CosmosHistoryProvider` handles all message persistence automatically: +- Loads conversation history before each `agent.run()` +- Stores user input after each run +- Stores assistant response after each run +- Uses `session_id` as the Cosmos DB partition key + +## How It Works + +### Flow Diagram + +``` +User Request + ↓ +POST /api/threads/{thread_id}/messages + ↓ +1. Validate thread exists + ↓ +2. agent.run(content, session_id=thread_id) + ↓ + ┌─────────────────────────────────────────┐ + │ CosmosHistoryProvider (automatic): │ + │ • Load previous messages from Cosmos │ + │ • Add to agent context │ + └─────────────────────────────────────────┘ + ↓ +3. Agent analyzes context and decides tools + ↓ +4. Agent automatically calls tools as needed: + - get_weather("Seattle") + - calculate("85 * 0.15") + - search_microsoft_docs("Azure Functions") + ↓ + ┌─────────────────────────────────────────┐ + │ CosmosHistoryProvider (automatic): │ + │ • Store user message to Cosmos │ + │ • Store assistant response to Cosmos │ + └─────────────────────────────────────────┘ + ↓ +5. Return response to user +``` + +### Example Interactions + +**Weather Query:** +``` +User: "What's the weather in Tokyo?" +→ Agent calls: get_weather("Tokyo") +→ Response: "The weather in Tokyo is 72°F with partly cloudy conditions." +``` + +**Multi-tool Query:** +``` +User: "What's the weather in Paris and what's 18% tip on €75?" +→ Agent calls: get_weather("Paris") AND calculate("75 * 0.18") +→ Response: "The weather in Paris is 65°F with light rain. An 18% tip on €75 is €13.50." +``` + +**Microsoft Docs Query:** +``` +User: "How do I deploy Azure Functions with Python?" +→ Agent calls: search_microsoft_docs("Azure Functions Python deployment") +→ Response: "To deploy Azure Functions with Python, you can use..." +``` + +**No Tools Needed:** +``` +User: "Tell me a joke" +→ Agent responds directly (no tools called) +→ Response: "Why did the programmer quit? Because they didn't get arrays!" +``` + +## Environment Variables Required + +Make sure your `local.settings.json` includes: + +```json +{ + "Values": { + "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", + "AZURE_OPENAI_MODEL": "gpt-4o", + "AZURE_OPENAI_API_VERSION": "2024-10-21", + "AZURE_COSMOS_ENDPOINT": "https://your-cosmos.documents.azure.com:443/", + "AZURE_COSMOS_DATABASE_NAME": "chat_db", + "AZURE_COSMOS_CONTAINER_NAME": "messages", + "AZURE_COSMOS_THREADS_CONTAINER_NAME": "threads" + } +} +``` + +**Note:** Two containers are used: +- `AZURE_COSMOS_CONTAINER_NAME` - Messages (managed by `CosmosHistoryProvider`) +- `AZURE_COSMOS_THREADS_CONTAINER_NAME` - Thread metadata (managed by `CosmosConversationStore`) + +## Next Steps + +### Local Testing +```bash +# Install dependencies +pip install -r requirements.txt + +# Start the function app +func start + +# Test with demo.http or curl +curl -X POST http://localhost:7071/api/threads +curl -X POST http://localhost:7071/api/threads/{thread_id}/messages \ + -H "Content-Type: application/json" \ + -d '{"content": "What is the weather in Seattle?"}' +``` + +### Deploy to Azure +```bash +azd auth login +azd up +``` + +## Key Benefits of This Implementation + +1. **Intelligent Tool Selection**: The LLM decides which tools to use based on context +2. **Multi-tool Coordination**: Can call multiple tools in one response +3. **Automatic History Persistence**: `CosmosHistoryProvider` handles message storage automatically +4. **Simplified Code**: No manual message load/store - just `agent.run(content, session_id=...)` +5. **Production Ready**: Includes error handling, observability, and security +6. **Scalable**: Serverless Azure Functions with serverless Cosmos DB +7. **Observable**: OpenTelemetry spans for all operations + +## Architecture Pattern + +This implementation demonstrates the **Agent with Tools** pattern: +- Single AI agent (not a workflow) +- Dynamic tool selection by LLM +- Suitable for chat-based RAG applications +- Simple, maintainable, and efficient + +For complex multi-agent orchestration, consider using [Microsoft Agent Framework Workflows](https://learn.microsoft.com/agent-framework/user-guide/workflows/overview). diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md new file mode 100644 index 0000000000..35abf45d81 --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md @@ -0,0 +1,249 @@ +--- +status: in-progress +contact: @vj-msft +date: 2024-12-06 +updated: 2026-03-24 +deciders: TBD +consulted: TBD +informed: TBD +--- + +# Production Chat API with Azure Functions, Cosmos DB & Agent Framework + +## References + +- **GitHub Issue**: [#2436 - Python: [Sample Request] Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) +- **Microsoft Documentation**: + - [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) + - [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) + - [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) + - [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) + +## What is the goal of this feature? + +Provide a **production-ready sample** demonstrating how to build a scalable Chat API using the Microsoft Agent Framework with: + +1. **Azure Functions** for serverless, scalable hosting +2. **Azure Cosmos DB** for durable conversation persistence +3. **Function Tools** showcasing runtime tool selection by the agent + +### Value Proposition + +- Developers can use this sample as a reference architecture for deploying Agent Framework in production +- Demonstrates enterprise patterns: state persistence, observability, and thread-based conversations +- Shows the power of **agent autonomy** - the agent decides which tools to invoke at runtime based on conversation context + +### Success Metrics + +1. Sample is referenced in at least 3 external blog posts/tutorials within 6 months +2. Sample serves as the canonical reference for "Agent Framework + Azure Functions + Cosmos DB" stack + + +## Architecture Overview + +```mermaid +flowchart TB + subgraph Clients["Client Applications"] + Web["Web"] + Mobile["Mobile"] + CLI["CLI / Postman"] + end + + subgraph AzureFunctions["Azure Functions (Flex Consumption)"] + subgraph Endpoints["HTTP Trigger Endpoints"] + POST1["POST /api/threads"] + POST2["POST /api/threads/{id}/messages"] + GET1["GET /api/threads/{id}"] + DELETE1["DELETE /api/threads/{id}"] + end + + subgraph Agent["ChatAgent"] + Weather["get_weather"] + Calc["calculate"] + KB["search_knowledge_base"] + Docs["search_microsoft_docs
(MCP)"] + Code["search_microsoft_code_samples
(MCP)"] + end + + Endpoints --> Agent + end + + subgraph Services["Azure Services"] + OpenAI["Azure OpenAI
(GPT-4o)"] + CosmosDB["Cosmos DB
(threads + messages)"] + AppInsights["Application Insights
(telemetry)"] + end + + subgraph MCP["MCP Server"] + MCPDocs["Microsoft Learn
Docs & Code Samples"] + end + + Clients --> AzureFunctions + Agent --> OpenAI + Agent --> CosmosDB + AzureFunctions --> AppInsights + Docs --> MCPDocs + Code --> MCPDocs +``` + + +## Key Design Decisions + +### 1. Runtime Tool Selection (Agent Autonomy) + +The agent is configured with multiple tools but **decides at runtime** which tool(s) to invoke based on user intent. Tools are registered once; the agent autonomously selects which to use for each request. + +**Implemented Tools**: +| Tool | Purpose | Status | +|------|---------|--------| +| `get_weather` | Weather information | ✅ Simulated | +| `calculate` | Math expressions | ✅ Safe AST eval | +| `search_knowledge_base` | FAQ/KB search | ✅ Simulated | +| `microsoft_docs_search` | Microsoft Learn search | ✅ MCP | +| `microsoft_code_sample_search` | Code sample search | ✅ MCP | + +### 2. Cosmos DB Persistence Strategy + +**Two-Container Approach**: + +| Container | Purpose | Managed By | +|-----------|---------|------------| +| `threads` | Thread metadata (user_id, title, timestamps) | `CosmosConversationStore` (custom) | +| `messages` | Conversation messages | `CosmosHistoryProvider` (framework) | + +**CosmosHistoryProvider** from `agent-framework-azure-cosmos` ([PR #4271](https://github.com/microsoft/agent-framework/pull/4271)) automatically: +- Loads conversation history before each agent run +- Stores user inputs and agent responses after each run +- Uses `session_id` (thread_id) as the partition key + +**Partition Strategy**: +- **Messages**: `/session_id` - all messages for a thread stored together +- **Threads**: `/id` - thread metadata isolated by thread_id +- `source_id` field allows multiple agents to share a container + +### 3. Azure Functions Hosting + +Using **HTTP Triggers** for a familiar REST API pattern: + +- Standard HTTP trigger endpoints (POST, GET, DELETE) +- Singleton pattern for agent and history provider (reused across invocations) +- Flex Consumption plan for serverless scaling +- Simple deployment via `azd up` + +### 4. Observability + +Using Agent Framework's `setup_observability()` with custom spans for: +- HTTP request lifecycle +- Cosmos DB operations +- Request validation + +Exporters: OTLP and Azure Monitor (Application Insights) + +## API Design + +### Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `POST` | `/api/threads` | Create a new conversation thread | +| `GET` | `/api/threads/{thread_id}` | Get thread metadata | +| `DELETE` | `/api/threads/{thread_id}` | Delete a thread and its messages | +| `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | +| `GET` | `/api/health` | Health check | + +### Request/Response Behavior + +**Create Thread**: Accepts optional `user_id`, `title`, and `metadata`. Returns created thread with generated `thread_id`. + +**Send Message**: Accepts `content` string. Agent automatically loads history, processes request (with tool calls as needed), and persists the conversation. Returns assistant response with any tool calls made. + +**Delete Thread**: Removes thread metadata and clears all messages from the history provider. + +See [demo.http](../demo.http) for complete request/response examples. + +## Implementation Status + +### Phase 1: Core Chat API ✅ + +- [x] Azure Functions HTTP triggers +- [x] ChatAgent with Azure OpenAI +- [x] Local tools (weather, calculator, knowledge base) +- [x] `CosmosHistoryProvider` for automatic message persistence +- [x] `CosmosConversationStore` for thread metadata +- [x] `demo.http` file for testing +- [x] README with setup instructions +- [x] Infrastructure as Code (Bicep + azd) + +### Phase 2: Observability ✅ + +- [x] OpenTelemetry integration via Agent Framework +- [x] Custom spans for HTTP requests and Cosmos operations +- [x] Structured logging +- [x] Health check endpoint + +### Phase 3: MCP Integration ✅ + +- [x] `MCPStreamableHTTPTool` for Microsoft Learn MCP server +- [x] `microsoft_docs_search` tool via MCP +- [x] `microsoft_code_sample_search` tool via MCP +- [x] Per-request MCP connection (serverless-friendly) + +### Phase 4: Production Hardening (Future) + +- [ ] Managed Identity authentication (currently uses DefaultAzureCredential) +- [ ] Retry policies and circuit breakers +- [ ] Rate limiting +- [ ] Input sanitization + +### Phase 5: Caching (Future) + +- [ ] Redis session cache for high-frequency access +- [ ] Recent messages caching + +## Project Structure + +```text +python/samples/demos/enterprise-chat-agent/ +├── function_app.py # Azure Functions entry point +├── requirements.txt # Dependencies +├── host.json # Functions host configuration +├── azure.yaml # azd deployment configuration +├── demo.http # API test file +├── services/ +│ ├── agent_service.py # ChatAgent + CosmosHistoryProvider +│ ├── cosmos_store.py # Thread metadata storage +│ └── observability.py # OpenTelemetry instrumentation +├── routes/ +│ ├── threads.py # Thread CRUD endpoints +│ ├── messages.py # Message endpoint +│ └── health.py # Health check +├── tools/ +│ ├── weather.py # Weather tool +│ ├── calculator.py # Calculator tool +│ └── knowledge_base.py # KB search tool +├── docs/ +│ ├── DESIGN.md # This document +│ └── AGENT_IMPLEMENTATION.md +└── infra/ + └── main.bicep # Azure infrastructure +``` + +## Security Considerations + +| Concern | Mitigation | +|---------|------------| +| **Authentication** | `DefaultAzureCredential` (supports Managed Identity, CLI, etc.) | +| **Thread Isolation** | Cosmos DB partition key on `thread_id` / `session_id` | +| **Secrets Management** | Environment variables (Key Vault recommended for production) | +| **Input Validation** | Request body validation in route handlers | + +## Testing + +- **Local Testing**: Use `demo.http` with VS Code REST Client or `func start` +- **Deployment**: `azd up` for full Azure deployment +- **Unit Tests**: Located in `tests/` directory + +## Open Questions + +1. **Streaming support**: Should a future phase include SSE streaming responses? +2. **Multi-tenant**: Should thread isolation support user-level partitioning? diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md new file mode 100644 index 0000000000..f4584519e1 --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md @@ -0,0 +1,164 @@ +# Microsoft Docs MCP Server Integration + +## Overview + +This document explains how to integrate the Microsoft Docs MCP server into the Enterprise Chat Agent, enabling real-time access to official Microsoft and Azure documentation. + +## What is MCP? + +**Model Context Protocol (MCP)** is a standard for connecting AI applications to external data sources and tools. The Microsoft Docs MCP server provides access to: +- Official Microsoft Learn documentation +- Azure service documentation +- Code samples and examples +- API references + +## Current Status + +The chat agent includes two MCP-ready tools: +- `search_microsoft_docs` - Search documentation content +- `search_microsoft_code_samples` - Find code examples + +**Status:** Tools are defined but MCP integration requires VS Code/Copilot Chat environment or custom MCP client implementation. + +## Integration Options + +### Option 1: Use in VS Code with GitHub Copilot (Recommended) + +The MCP server is already available in your VS Code environment. The tools can be used directly when the agent runs in a Copilot-enabled context. + +**No additional code needed** - the MCP functions are available via the Copilot extension. + +### Option 2: Direct HTTP API Integration (Azure Functions) + +For standalone Azure Functions deployment, replace MCP calls with direct REST API calls to Microsoft Learn search: + +```python +import httpx + +async def search_microsoft_docs(query: str, max_results: int = 5) -> list[dict]: + """Search Microsoft docs via REST API.""" + # Microsoft Learn has a public search endpoint + async with httpx.AsyncClient() as client: + response = await client.get( + "https://learn.microsoft.com/api/search", + params={ + "search": query, + "locale": "en-us", + "$top": max_results, + } + ) + results = response.json() + + return [ + { + "title": result["title"], + "content": result["description"], + "url": result["url"], + } + for result in results.get("results", []) + ] +``` + +### Option 3: Use Azure Cognitive Search on Microsoft Learn Index + +For production deployments, use Azure Cognitive Search with a pre-built index of Microsoft documentation: + +```python +from azure.search.documents import SearchClient +from azure.identity import DefaultAzureCredential + +async def search_microsoft_docs(query: str, max_results: int = 5) -> list[dict]: + """Search using Azure Cognitive Search.""" + credential = DefaultAzureCredential() + search_client = SearchClient( + endpoint=os.environ["AZURE_SEARCH_ENDPOINT"], + index_name="microsoft-docs-index", + credential=credential, + ) + + results = search_client.search( + search_text=query, + top=max_results, + select=["title", "content", "url"], + ) + + return [ + { + "title": doc["title"], + "content": doc["content"], + "url": doc["url"], + } + for doc in results + ] +``` + +## Example Usage + +Once integrated, users can ask: + +``` +User: "How do I configure partition keys in Azure Cosmos DB?" +→ Agent calls: search_microsoft_docs("Cosmos DB partition keys") +→ Returns: Official docs with best practices, examples, and guidance +``` + +``` +User: "Show me Python code for Azure OpenAI chat completion" +→ Agent calls: search_microsoft_code_samples("Azure OpenAI chat completion", language="python") +→ Returns: Official code examples from Microsoft Learn +``` + +## Implementation Steps + +### Quick Test (Local with VS Code) + +1. The MCP server is already available in your VS Code environment +2. Tools are defined and ready +3. Test with Copilot Chat to verify MCP integration + +### Production Deployment (Azure Functions) + +1. Choose integration method (Option 2 or 3 above) +2. Update `tools/microsoft_docs.py` with real implementation +3. Add required dependencies to `requirements.txt`: + ``` + httpx>=0.24.0 # For REST API option + # OR + azure-search-documents>=11.4.0 # For Azure Search option + ``` +4. Add environment variables: + ```json + { + "AZURE_SEARCH_ENDPOINT": "https://your-search.search.windows.net", + "MICROSOFT_LEARN_API_KEY": "optional-if-using-api" + } + ``` +5. Deploy with `azd up` + +## Benefits + +✅ **Authoritative Information**: Official Microsoft documentation +✅ **Always Current**: Latest product updates and features +✅ **Code Examples**: Real, tested code samples +✅ **Better Support**: Answer Azure questions with confidence +✅ **Reduced Hallucination**: Grounded in actual documentation + +## Example Queries the Agent Can Now Handle + +- "What are Azure Functions hosting options?" +- "How do I implement retry policies in Azure?" +- "Show me code for Azure Cosmos DB bulk operations" +- "What's the difference between Azure App Service and Container Apps?" +- "How do I configure CORS for Azure Functions?" +- "Best practices for Azure OpenAI rate limiting" + +## Next Steps + +1. **Test locally**: Run agent and ask Azure-related questions +2. **Choose production integration**: REST API or Azure Search +3. **Implement real search**: Replace placeholder with actual calls +4. **Deploy and monitor**: Track which docs are most helpful + +For questions about MCP, see: +- [Model Context Protocol Specification](https://modelcontextprotocol.io) +- [Microsoft MCP Servers](https://github.com/microsoft/mcp-servers) diff --git a/python/samples/demos/enterprise-chat-agent/docs/observability-design.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md similarity index 100% rename from python/samples/demos/enterprise-chat-agent/docs/observability-design.md rename to python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md diff --git a/python/samples/demos/enterprise-chat-agent/function_app.py b/python/samples/05-end-to-end/enterprise-chat-agent/function_app.py similarity index 100% rename from python/samples/demos/enterprise-chat-agent/function_app.py rename to python/samples/05-end-to-end/enterprise-chat-agent/function_app.py diff --git a/python/samples/demos/enterprise-chat-agent/host.json b/python/samples/05-end-to-end/enterprise-chat-agent/host.json similarity index 100% rename from python/samples/demos/enterprise-chat-agent/host.json rename to python/samples/05-end-to-end/enterprise-chat-agent/host.json diff --git a/python/samples/demos/enterprise-chat-agent/infra/abbreviations.json b/python/samples/05-end-to-end/enterprise-chat-agent/infra/abbreviations.json similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/abbreviations.json rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/abbreviations.json diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/core/host/function-app.bicep rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/monitor/monitoring.bicep similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/core/monitor/monitoring.bicep rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/core/monitor/monitoring.bicep diff --git a/python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/storage/storage-account.bicep similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/core/storage/storage-account.bicep rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/core/storage/storage-account.bicep diff --git a/python/samples/demos/enterprise-chat-agent/infra/main.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/main.bicep rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep diff --git a/python/samples/demos/enterprise-chat-agent/infra/main.parameters.json b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.parameters.json similarity index 100% rename from python/samples/demos/enterprise-chat-agent/infra/main.parameters.json rename to python/samples/05-end-to-end/enterprise-chat-agent/infra/main.parameters.json diff --git a/python/samples/demos/enterprise-chat-agent/local.settings.json.example b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example similarity index 100% rename from python/samples/demos/enterprise-chat-agent/local.settings.json.example rename to python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/prompts/system_prompt.txt b/python/samples/05-end-to-end/enterprise-chat-agent/prompts/system_prompt.txt new file mode 100644 index 0000000000..2564ab0a37 --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/prompts/system_prompt.txt @@ -0,0 +1,28 @@ +You are a helpful enterprise chat assistant with access to multiple tools: + +**Microsoft Documentation (via MCP):** +- microsoft_docs_search: Search official Microsoft and Azure documentation +- microsoft_code_sample_search: Find code examples from Microsoft Learn + +**Internal Knowledge:** +- search_knowledge_base: Search internal company policies and FAQs + +**Utility Tools:** +- get_weather: Get current weather information for any location +- calculate: Evaluate mathematical expressions safely + +**When to use each tool:** +- Use microsoft_docs_search for questions about Azure, Microsoft products, cloud architecture +- Use microsoft_code_sample_search when users need code examples or implementation details +- Use search_knowledge_base for company-specific policies, procedures, and FAQs +- Use get_weather for weather-related questions +- use calculate for mathematical computations + +**Best practices:** +1. Determine which tools are needed based on the question +2. Call appropriate tools to gather authoritative information +3. Provide clear responses with citations and sources +4. For Azure/Microsoft questions, check official docs first +5. Be concise but thorough + +Always cite your sources, especially when referencing documentation. diff --git a/python/samples/demos/enterprise-chat-agent/requirements.txt b/python/samples/05-end-to-end/enterprise-chat-agent/requirements.txt similarity index 54% rename from python/samples/demos/enterprise-chat-agent/requirements.txt rename to python/samples/05-end-to-end/enterprise-chat-agent/requirements.txt index 5c7bb3e0ba..b9cfdc2338 100644 --- a/python/samples/demos/enterprise-chat-agent/requirements.txt +++ b/python/samples/05-end-to-end/enterprise-chat-agent/requirements.txt @@ -1,16 +1,23 @@ # Azure Functions azure-functions>=1.21.0 -# Microsoft Agent Framework -agent-framework --pre +# Microsoft Agent Framework (rc5 - March 2026) +# See: https://learn.microsoft.com/agent-framework/support/upgrade/python-2026-significant-changes +agent-framework>=1.0.0rc5,<2.0.0 + +# Azure Cosmos DB History Provider +# Provides CosmosHistoryProvider for automatic conversation history persistence +agent-framework-azure-cosmos>=1.0.0b260311,<2.0.0 # Azure SDK azure-identity>=1.15.0 -azure-cosmos>=4.7.0 # Azure OpenAI (used directly until Agent Framework packages are published) openai>=1.0.0 +# MCP Client (for Microsoft Learn documentation tools) +mcp>=1.0.0 + # OpenTelemetry Core opentelemetry-api>=1.25.0 opentelemetry-sdk>=1.25.0 diff --git a/python/samples/demos/enterprise-chat-agent/routes/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py similarity index 100% rename from python/samples/demos/enterprise-chat-agent/routes/__init__.py rename to python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py diff --git a/python/samples/demos/enterprise-chat-agent/routes/health.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py similarity index 100% rename from python/samples/demos/enterprise-chat-agent/routes/health.py rename to python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py diff --git a/python/samples/demos/enterprise-chat-agent/routes/messages.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py similarity index 52% rename from python/samples/demos/enterprise-chat-agent/routes/messages.py rename to python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py index 48350772bc..3d00b89547 100644 --- a/python/samples/demos/enterprise-chat-agent/routes/messages.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py @@ -4,13 +4,18 @@ import json import logging -import uuid +from datetime import datetime, timezone import azure.functions as func -from services import http_request_span, cosmos_span +from services import ( + http_request_span, + cosmos_span, + get_agent, + get_history_provider, + get_mcp_tool, +) from routes.threads import get_store -from tools import get_weather, calculate, search_knowledge_base bp = func.Blueprint() @@ -20,8 +25,10 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: """ Send a message to the agent and get a response. - The agent will autonomously decide which tools to use based on - the message content. + The agent uses: + - CosmosHistoryProvider for automatic conversation history persistence + - MCPStreamableHTTPTool for Microsoft Learn documentation search + - Local tools for weather, calculator, and knowledge base Request: POST /api/threads/{thread_id}/messages @@ -30,7 +37,6 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: Response: 200 OK { - "id": "msg_xxx", "thread_id": "thread_xxx", "role": "assistant", "content": "The weather in Seattle is...", @@ -77,82 +83,38 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: mimetype="application/json", ) - # Store user message in Cosmos DB - user_message_id = f"msg_{uuid.uuid4().hex[:12]}" - async with cosmos_span("upsert", "messages", thread_id): - await store.add_message( - thread_id=thread_id, - message_id=user_message_id, - role="user", - content=content, - metadata={"client": "http_api"}, + # Get agent (configured with CosmosHistoryProvider and local tools) + agent = get_agent() + + # Run agent with MCP tools for Microsoft Learn documentation + # The agent combines: + # - Local tools: get_weather, calculate, search_knowledge_base + # - MCP tools: microsoft_docs_search, microsoft_code_sample_search + async with get_mcp_tool() as mcp: + response = await agent.run( + content, + session_id=thread_id, + tools=mcp, # Add MCP tools for this run ) - # TODO: Replace with actual agent invocation - # agent = get_agent() - # response = await agent.run(content, thread_id=thread_id) - # Framework auto-creates: invoke_agent, chat, execute_tool spans - - # Placeholder response (demonstrates tool selection pattern) + # Extract response content and tool calls + response_content = response.text or "" tool_calls = [] - response_content = "" - - # Simple keyword-based tool selection demo - content_lower = content.lower() - if "weather" in content_lower: - location = "Seattle" # Default - if "in " in content_lower: - location = content_lower.split("in ")[-1].split()[0].title() - weather_result = get_weather(location) - tool_calls.append({ - "tool": "get_weather", - "arguments": {"location": location}, - "result": weather_result, - }) - response_content += ( - f"The weather in {location} is {weather_result['temp']}°F " - f"with {weather_result['condition']}. " - ) - - calc_keywords = ["calculate", "tip", "%", "percent"] - if any(word in content_lower for word in calc_keywords): - calc_result = calculate("85 * 0.15") - tool_calls.append({ - "tool": "calculate", - "arguments": {"expression": "85 * 0.15"}, - "result": calc_result, - }) - response_content += f"A 15% tip on $85 is ${calc_result:.2f}." - if not response_content: - response_content = ( - f"I received your message: '{content}'. " - "How can I help you further?" - ) - - # Store assistant response in Cosmos DB - assistant_message_id = f"msg_{uuid.uuid4().hex[:12]}" - - # Example: Add sources for RAG - sources = None - if "weather" in content_lower: - sources = [ - { - "title": "Weather Service API", - "url": "https://api.weather.example.com", - "snippet": "Real-time weather data", - } - ] - - async with cosmos_span("upsert", "messages", thread_id): - assistant_message = await store.add_message( + # Parse tool calls from response if any + if hasattr(response, "tool_calls") and response.tool_calls: + for tool_call in response.tool_calls: + tool_calls.append({ + "tool": getattr(tool_call, "name", str(tool_call)), + "arguments": getattr(tool_call, "arguments", {}), + }) + + # Update thread metadata with last message preview + async with cosmos_span("update", "threads", thread_id): + preview = response_content[:100] + "..." if len(response_content) > 100 else response_content + await store.update_thread( thread_id=thread_id, - message_id=assistant_message_id, - role="assistant", - content=response_content.strip(), - tool_calls=tool_calls if tool_calls else None, - sources=sources, - metadata={"model": "placeholder"}, + last_message_preview=preview, ) logging.info( @@ -160,9 +122,18 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: f"tools used: {[t['tool'] for t in tool_calls]}" ) + # Build response + result = { + "thread_id": thread_id, + "role": "assistant", + "content": response_content, + "tool_calls": tool_calls if tool_calls else None, + "timestamp": datetime.now(timezone.utc).isoformat(), + } + span.set_attribute("http.status_code", 200) return func.HttpResponse( - body=json.dumps(assistant_message), + body=json.dumps(result), mimetype="application/json", ) @@ -170,7 +141,7 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: @bp.route(route="threads/{thread_id}/messages", methods=["GET"]) async def get_messages(req: func.HttpRequest) -> func.HttpResponse: """ - Get conversation history for a thread. + Get conversation history for a thread from CosmosHistoryProvider. Request: GET /api/threads/{thread_id}/messages @@ -198,11 +169,21 @@ async def get_messages(req: func.HttpRequest) -> func.HttpResponse: mimetype="application/json", ) + # Get messages from CosmosHistoryProvider + history_provider = get_history_provider() async with cosmos_span("query", "messages", thread_id): - messages = await store.get_messages(thread_id) + messages = await history_provider.get_messages(session_id=thread_id) + + # Convert Message objects to serializable dicts + message_list = [] + for msg in messages: + message_list.append({ + "role": msg.role.value if hasattr(msg.role, "value") else str(msg.role), + "content": msg.content if hasattr(msg, "content") else str(msg), + }) span.set_attribute("http.status_code", 200) return func.HttpResponse( - body=json.dumps({"messages": messages}), + body=json.dumps({"messages": message_list}), mimetype="application/json", ) diff --git a/python/samples/demos/enterprise-chat-agent/routes/threads.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py similarity index 88% rename from python/samples/demos/enterprise-chat-agent/routes/threads.py rename to python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py index 4428a317e9..a09b998131 100644 --- a/python/samples/demos/enterprise-chat-agent/routes/threads.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py @@ -12,6 +12,7 @@ CosmosConversationStore, http_request_span, cosmos_span, + get_history_provider, ) bp = func.Blueprint() @@ -110,6 +111,9 @@ async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: """ Delete a thread and its messages. + Deletes both the thread metadata and all messages stored by + CosmosHistoryProvider for this thread's session. + Request: DELETE /api/threads/{thread_id} @@ -122,6 +126,8 @@ async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: "DELETE", "/threads/{thread_id}", thread_id=thread_id ) as span: store = get_store() + + # Delete thread metadata async with cosmos_span("delete", "threads", thread_id): deleted = await store.delete_thread(thread_id) @@ -133,7 +139,12 @@ async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: mimetype="application/json", ) - logging.info(f"Deleted thread {thread_id}") + # Clear messages from CosmosHistoryProvider + history_provider = get_history_provider() + async with cosmos_span("delete", "messages", thread_id): + await history_provider.clear(session_id=thread_id) + + logging.info(f"Deleted thread {thread_id} and cleared messages") span.set_attribute("http.status_code", 204) return func.HttpResponse(status_code=204) diff --git a/python/samples/demos/enterprise-chat-agent/services/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py similarity index 62% rename from python/samples/demos/enterprise-chat-agent/services/__init__.py rename to python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py index 459ed32b26..9a76a904c9 100644 --- a/python/samples/demos/enterprise-chat-agent/services/__init__.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py @@ -4,8 +4,9 @@ Core modules for Enterprise Chat Agent. This package contains foundational components: -- cosmos_store: Azure Cosmos DB storage for threads and messages +- cosmos_store: Azure Cosmos DB storage for thread metadata - observability: OpenTelemetry instrumentation for tracing +- agent_service: ChatAgent with CosmosHistoryProvider and MCP integration """ from services.cosmos_store import CosmosConversationStore @@ -16,6 +17,12 @@ validation_span, EnterpriseAgentAttr, ) +from services.agent_service import ( + get_agent, + get_history_provider, + get_mcp_tool, + close_providers, +) __all__ = [ "CosmosConversationStore", @@ -24,4 +31,8 @@ "cosmos_span", "validation_span", "EnterpriseAgentAttr", + "get_agent", + "get_history_provider", + "get_mcp_tool", + "close_providers", ] diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py new file mode 100644 index 0000000000..859d6022a7 --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py @@ -0,0 +1,189 @@ +# Copyright (c) Microsoft. All rights reserved. + +""" +Chat Agent Service + +Provides an Agent instance configured with CosmosHistoryProvider for +automatic conversation history persistence, local tools for weather, +calculation, and knowledge base search, plus MCP integration for +Microsoft Learn documentation search. +""" + +import logging +import os +from pathlib import Path +from typing import Optional + +from azure.identity import DefaultAzureCredential +from agent_framework import Agent, MCPStreamableHTTPTool +from agent_framework.azure import AzureOpenAIChatClient +from agent_framework_azure_cosmos import CosmosHistoryProvider + +from tools import ( + get_weather, + calculate, + search_knowledge_base, +) + + +_history_provider: Optional[CosmosHistoryProvider] = None +_agent: Optional[Agent] = None +_credential: Optional[DefaultAzureCredential] = None + +# Prompts directory +_PROMPTS_DIR = Path(__file__).parent.parent / "prompts" + + +def _load_prompt(name: str) -> str: + """Load a prompt from the prompts directory.""" + prompt_path = _PROMPTS_DIR / f"{name}.txt" + return prompt_path.read_text(encoding="utf-8") + +# Microsoft Learn MCP server URL +MICROSOFT_LEARN_MCP_URL = "https://learn.microsoft.com/api/mcp" + + +def get_history_provider() -> CosmosHistoryProvider: + """ + Get or create the singleton CosmosHistoryProvider instance. + + The provider automatically: + - Loads conversation history before each agent run + - Stores user inputs and agent responses + - Uses session_id as the Cosmos DB partition key + + Returns: + Configured CosmosHistoryProvider instance. + """ + global _history_provider, _credential + + if _history_provider is None: + endpoint = os.environ.get("AZURE_COSMOS_ENDPOINT") + database_name = os.environ.get("AZURE_COSMOS_DATABASE_NAME", "chat_db") + container_name = os.environ.get("AZURE_COSMOS_CONTAINER_NAME", "messages") + + if not endpoint: + raise ValueError( + "AZURE_COSMOS_ENDPOINT environment variable is required" + ) + + if _credential is None: + _credential = DefaultAzureCredential() + + _history_provider = CosmosHistoryProvider( + source_id="enterprise_chat_agent", + endpoint=endpoint, + database_name=database_name, + container_name=container_name, + credential=_credential, + load_messages=True, # Load history before each run + store_inputs=True, # Store user messages + store_outputs=True, # Store assistant responses + ) + + logging.info( + f"Initialized CosmosHistoryProvider with database={database_name}, " + f"container={container_name}" + ) + + return _history_provider + + +def get_agent() -> Agent: + """ + Get or create the singleton Agent instance. + + The agent is configured with: + - Azure OpenAI chat client + - CosmosHistoryProvider for automatic conversation persistence + - Weather, calculator, and knowledge base tools + - System instructions for enterprise chat support + + Returns: + Configured Agent instance. + """ + global _agent + + if _agent is None: + # Get Azure OpenAI configuration from environment + endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") + deployment_name = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME", "gpt-4o") + api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-21") + + if not endpoint: + raise ValueError( + "AZURE_OPENAI_ENDPOINT environment variable is required" + ) + + # Create Azure OpenAI chat client with credential + global _credential + if _credential is None: + _credential = DefaultAzureCredential() + + chat_client = AzureOpenAIChatClient( + endpoint=endpoint, + deployment_name=deployment_name, + api_version=api_version, + credential=_credential, + ) + + # Get the history provider + history_provider = get_history_provider() + + # Load system instructions from prompts folder + instructions = _load_prompt("system_prompt") + + # Create Agent with local tools and history provider + # MCP tools are added at runtime via run() method + _agent = Agent( + client=chat_client, + instructions=instructions, + tools=[ + get_weather, + calculate, + search_knowledge_base, + ], + context_providers=[history_provider], # Auto-persist history + name="EnterpriseAssistant", + ) + + logging.info( + f"Initialized Agent with deployment {deployment_name}, CosmosHistoryProvider, " + "and local tools: get_weather, calculate, search_knowledge_base" + ) + + return _agent + + +def get_mcp_tool() -> MCPStreamableHTTPTool: + """ + Create an MCPStreamableHTTPTool for Microsoft Learn documentation. + + This connects to the Microsoft Learn MCP server which provides: + - microsoft_docs_search: Search Microsoft documentation + - microsoft_code_sample_search: Search code samples + + The tool should be used as an async context manager: + async with get_mcp_tool() as mcp: + response = await agent.run(content, session_id=thread_id, tools=mcp) + + Returns: + Configured MCPStreamableHTTPTool instance. + """ + return MCPStreamableHTTPTool( + name="Microsoft Learn", + url=MICROSOFT_LEARN_MCP_URL, + description="Search Microsoft and Azure documentation and code samples", + approval_mode="never_require", # Auto-approve tool calls for docs search + ) + + return _agent + + +async def close_providers() -> None: + """Close the history provider and release resources.""" + global _history_provider + if _history_provider is not None: + await _history_provider.close() + _history_provider = None + logging.info("Closed CosmosHistoryProvider") diff --git a/python/samples/demos/enterprise-chat-agent/services/cosmos_store.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py similarity index 55% rename from python/samples/demos/enterprise-chat-agent/services/cosmos_store.py rename to python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py index beed74c1df..227435926f 100644 --- a/python/samples/demos/enterprise-chat-agent/services/cosmos_store.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py @@ -1,13 +1,16 @@ # Copyright (c) Microsoft. All rights reserved. """ -Cosmos DB Storage for Threads and Messages +Cosmos DB Storage for Thread Metadata -This module provides persistent storage for conversation threads and messages -using Azure Cosmos DB with thread_id as the partition key. +This module provides persistent storage for conversation thread metadata +using Azure Cosmos DB. Message storage is handled separately by the +CosmosHistoryProvider from agent-framework-azure-cosmos package. Document Types: - Thread: {"type": "thread", "id": "thread_xxx", "thread_id": "thread_xxx", ...} -- Message: {"type": "message", "id": "msg_xxx", "thread_id": "thread_xxx", ...} + +Note: Conversation messages are managed by CosmosHistoryProvider which uses +session_id (thread_id) as the partition key for efficient message retrieval. """ import logging @@ -15,17 +18,17 @@ from datetime import datetime, timezone from typing import Any -from azure.cosmos import CosmosClient, PartitionKey +from azure.cosmos import CosmosClient from azure.cosmos.exceptions import CosmosResourceNotFoundError from azure.identity import DefaultAzureCredential class CosmosConversationStore: """ - Manages conversation threads and messages in Azure Cosmos DB. + Manages conversation thread metadata in Azure Cosmos DB. - Uses a single container with thread_id as partition key. - Documents are differentiated by 'type' field: 'thread' or 'message'. + Thread metadata includes: user_id, title, status, created_at, updated_at. + Message persistence is handled by CosmosHistoryProvider (context provider). """ def __init__( @@ -41,7 +44,7 @@ def __init__( Args: endpoint: Cosmos DB endpoint URL. Defaults to AZURE_COSMOS_ENDPOINT env var. database_name: Database name. Defaults to AZURE_COSMOS_DATABASE_NAME env var. - container_name: Container name. Defaults to AZURE_COSMOS_CONTAINER_NAME env var. + container_name: Container name for threads. Defaults to AZURE_COSMOS_THREADS_CONTAINER_NAME. credential: Azure credential. Defaults to DefaultAzureCredential. """ self.endpoint = endpoint or os.environ.get("AZURE_COSMOS_ENDPOINT") @@ -49,7 +52,7 @@ def __init__( "AZURE_COSMOS_DATABASE_NAME", "chat_db" ) self.container_name = container_name or os.environ.get( - "AZURE_COSMOS_CONTAINER_NAME", "messages" + "AZURE_COSMOS_THREADS_CONTAINER_NAME", "threads" ) if not self.endpoint: @@ -64,11 +67,19 @@ def __init__( @property def container(self): - """Lazy initialization of Cosmos DB container client.""" + """Lazy initialization of Cosmos DB container client with auto-create.""" if self._container is None: self._client = CosmosClient(self.endpoint, credential=self.credential) - database = self._client.get_database_client(self.database_name) - self._container = database.get_container_client(self.container_name) + # Create database if it doesn't exist + database = self._client.create_database_if_not_exists(id=self.database_name) + # Create container with thread_id as partition key + self._container = database.create_container_if_not_exists( + id=self.container_name, + partition_key={"paths": ["/thread_id"], "kind": "Hash"}, + ) + logging.info( + f"Initialized Cosmos container: {self.database_name}/{self.container_name}" + ) return self._container # ------------------------------------------------------------------------- @@ -134,7 +145,10 @@ async def get_thread(self, thread_id: str) -> dict | None: async def delete_thread(self, thread_id: str) -> bool: """ - Delete a thread and all its messages. + Delete a thread metadata document. + + Note: Messages are stored separately by CosmosHistoryProvider and + can be cleared using history_provider.clear(session_id=thread_id). Args: thread_id: Thread identifier. @@ -142,116 +156,13 @@ async def delete_thread(self, thread_id: str) -> bool: Returns: True if deleted, False if not found. """ - # First, get all items in the partition (thread + messages) - query = "SELECT c.id FROM c WHERE c.thread_id = @thread_id" - items = list( - self.container.query_items( - query=query, - parameters=[{"name": "@thread_id", "value": thread_id}], - partition_key=thread_id, - ) - ) - - if not items: + try: + self.container.delete_item(item=thread_id, partition_key=thread_id) + logging.info(f"Deleted thread {thread_id} from Cosmos DB") + return True + except CosmosResourceNotFoundError: return False - # Delete all items in the partition - for item in items: - self.container.delete_item(item=item["id"], partition_key=thread_id) - - logging.info(f"Deleted thread {thread_id} and {len(items)} items from Cosmos DB") - return True - - # ------------------------------------------------------------------------- - # Message Operations - # ------------------------------------------------------------------------- - - async def add_message( - self, - thread_id: str, - message_id: str, - role: str, - content: str, - tool_calls: list[dict] | None = None, - sources: list[dict] | None = None, - metadata: dict | None = None, - ) -> dict: - """ - Add a message to a thread and update thread metadata. - - Args: - thread_id: Thread identifier (partition key). - message_id: Unique message identifier. - role: Message role ('user', 'assistant', or 'system'). - content: Message content. - tool_calls: Optional list of tool calls made by the agent. - sources: Optional RAG sources (for assistant messages). - metadata: Optional custom metadata. - - Returns: - The created message document. - """ - message = { - "id": message_id, - "message_id": message_id, - "thread_id": thread_id, # Partition key - "type": "message", - "role": role, - "content": content, - "timestamp": datetime.now(timezone.utc).isoformat(), - "tool_calls": tool_calls, - "sources": sources, - "metadata": metadata or {}, - } - - self.container.create_item(body=message) - logging.info(f"Added {role} message {message_id} to thread {thread_id}") - - # Update thread metadata - thread = await self.get_thread(thread_id) - if thread: - # Truncate content for preview (first 100 chars) - preview = content[:100] + "..." if len(content) > 100 else content - await self.update_thread( - thread_id=thread_id, - message_count=thread.get("message_count", 0) + 1, - last_message_preview=preview, - ) - - return message - - async def get_messages( - self, - thread_id: str, - limit: int = 100, - ) -> list[dict]: - """ - Get all messages in a thread, ordered by timestamp. - - Args: - thread_id: Thread identifier. - limit: Maximum number of messages to return. - - Returns: - List of message documents. - """ - query = """ - SELECT * FROM c - WHERE c.thread_id = @thread_id AND c.type = 'message' - ORDER BY c.timestamp ASC - """ - - messages = list( - self.container.query_items( - query=query, - parameters=[{"name": "@thread_id", "value": thread_id}], - partition_key=thread_id, - max_item_count=limit, - ) - ) - - return messages - async def update_thread( self, thread_id: str, diff --git a/python/samples/demos/enterprise-chat-agent/services/observability.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py similarity index 93% rename from python/samples/demos/enterprise-chat-agent/services/observability.py rename to python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py index e6a6956b28..a5ce7f68a8 100644 --- a/python/samples/demos/enterprise-chat-agent/services/observability.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py @@ -8,7 +8,7 @@ - Cosmos DB operations - Request validation -Uses the framework's setup_observability() and get_tracer() APIs. +Uses the framework's configure_otel_providers() and get_tracer() APIs. """ import logging @@ -18,7 +18,7 @@ from opentelemetry.trace import Span, SpanKind, Status, StatusCode # Import framework's observability - use framework APIs, don't recreate them -from agent_framework.observability import setup_observability, get_tracer +from agent_framework.observability import configure_otel_providers, get_tracer logger = logging.getLogger(__name__) @@ -48,14 +48,15 @@ def init_observability() -> None: - OTLP and Azure Monitor exporters Environment variables used: - - ENABLE_OTEL: Enable OpenTelemetry (default: false) + - ENABLE_INSTRUMENTATION: Enable telemetry (default: false) - ENABLE_SENSITIVE_DATA: Log message contents (default: false) - - OTLP_ENDPOINT: OTLP collector endpoint + - ENABLE_CONSOLE_EXPORTERS: Enable console output (default: false) + - OTEL_EXPORTER_OTLP_ENDPOINT: OTLP collector endpoint - APPLICATIONINSIGHTS_CONNECTION_STRING: Azure Monitor connection - OTEL_SERVICE_NAME: Service name (default: agent_framework) """ try: - setup_observability() + configure_otel_providers() logger.info("Observability initialized successfully") except Exception as e: logger.warning(f"Failed to initialize observability: {e}") diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py new file mode 100644 index 0000000000..fb2615b8cc --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py @@ -0,0 +1,27 @@ +""" +Enterprise Chat Agent - Function Tools + +This module contains the local tools that the ChatAgent can invoke at runtime. +The agent autonomously decides which tools to use based on the user's message. + +Local Tools: +- get_weather: Get weather information for a location +- calculate: Evaluate mathematical expressions +- search_knowledge_base: Search internal company knowledge base + +MCP Tools (via Microsoft Learn MCP Server): +- microsoft_docs_search: Search Microsoft documentation +- microsoft_code_sample_search: Search code samples + +MCP tools are connected at runtime via MCPStreamableHTTPTool in agent_service.py +""" + +from tools.weather import get_weather +from tools.calculator import calculate +from tools.knowledge_base import search_knowledge_base + +__all__ = [ + "get_weather", + "calculate", + "search_knowledge_base", +] diff --git a/python/samples/demos/enterprise-chat-agent/tools/calculator.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py similarity index 94% rename from python/samples/demos/enterprise-chat-agent/tools/calculator.py rename to python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py index 4d7227c1a1..53d6904b07 100644 --- a/python/samples/demos/enterprise-chat-agent/tools/calculator.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py @@ -8,8 +8,7 @@ import operator from typing import Union -# TODO: Uncomment when implementing with actual Agent Framework -# from microsoft.agents.core import ai_function +from agent_framework import tool # Safe operators for expression evaluation SAFE_OPERATORS = { @@ -53,7 +52,7 @@ def _safe_eval(node: ast.AST) -> Union[int, float]: raise ValueError(f"Unsupported AST node type: {type(node).__name__}") -# @ai_function +@tool def calculate(expression: str) -> float: """ Evaluate a mathematical expression safely. diff --git a/python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py similarity index 95% rename from python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py rename to python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py index 9610229aab..07f7ad0229 100644 --- a/python/samples/demos/enterprise-chat-agent/tools/knowledge_base.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py @@ -8,8 +8,7 @@ from typing import Optional -# TODO: Uncomment when implementing with actual Agent Framework -# from microsoft.agents.core import ai_function +from agent_framework import tool # Simulated knowledge base entries KNOWLEDGE_BASE = [ @@ -46,7 +45,7 @@ ] -# @ai_function +@tool def search_knowledge_base( query: str, category: Optional[str] = None, diff --git a/python/samples/demos/enterprise-chat-agent/tools/weather.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py similarity index 85% rename from python/samples/demos/enterprise-chat-agent/tools/weather.py rename to python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py index b53689eec8..cdf2aa609e 100644 --- a/python/samples/demos/enterprise-chat-agent/tools/weather.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py @@ -7,11 +7,10 @@ import random -# TODO: Uncomment when implementing with actual Agent Framework -# from microsoft.agents.core import ai_function +from agent_framework import tool -# @ai_function +@tool def get_weather(location: str) -> dict: """ Get current weather for a location. diff --git a/python/samples/demos/enterprise-chat-agent/docs/DESIGN.md b/python/samples/demos/enterprise-chat-agent/docs/DESIGN.md deleted file mode 100644 index 611cc8a611..0000000000 --- a/python/samples/demos/enterprise-chat-agent/docs/DESIGN.md +++ /dev/null @@ -1,478 +0,0 @@ ---- -status: proposed -contact: @vj-msft -date: 2024-12-06 -deciders: TBD -consulted: TBD -informed: TBD ---- - -# Production Chat API with Azure Functions, Cosmos DB & Agent Framework - -## References - -- **GitHub Issue**: [#2436 - Python: [Sample Request] Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) -- **Microsoft Documentation**: - - [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) - - [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) - - [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) - - [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) - -## What is the goal of this feature? - -Provide a **production-ready sample** demonstrating how to build a scalable Chat API using the Microsoft Agent Framework with: - -1. **Azure Functions** for serverless, scalable hosting -2. **Azure Cosmos DB** for durable conversation persistence -3. **Function Tools** showcasing runtime tool selection by the agent - -### Value Proposition - -- Developers can use this sample as a reference architecture for deploying Agent Framework in production -- Demonstrates enterprise patterns: state persistence, observability, and thread-based conversations -- Shows the power of **agent autonomy** - the agent decides which tools to invoke at runtime based on conversation context - -### Success Metrics - -1. Sample is referenced in at least 3 external blog posts/tutorials within 6 months -2. Sample serves as the canonical reference for "Agent Framework + Azure Functions + Cosmos DB" stack - -## What is the problem being solved? - -### Current Pain Points - -1. **No production-ready Python sample exists** - Existing samples focus on getting started scenarios, not production deployment -2. **Gap in persistence guidance** - .NET has `CosmosNoSql` package, Python has no equivalent sample or implementation -3. **Tool selection patterns unclear** - Developers need to see how agents autonomously select tools at runtime - -### Why is this hard today? - -- Developers must piece together patterns from multiple sources -- No reference implementation for Cosmos DB persistence in Python -- Azure Functions + Agent Framework integration patterns are spread across docs - -## Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ Client Applications │ -│ (Web, Mobile, CLI, Postman, etc.) │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - ▼ -┌─────────────────────────────────────────────────────────────────────────┐ -│ Azure Functions (Flex Consumption) │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ HTTP Trigger Endpoints │ │ -│ │ POST /api/chat/{thread_id} - Send message │ │ -│ │ GET /api/chat/{thread_id} - Get thread history │ │ -│ │ POST /api/threads - Create new thread │ │ -│ │ DELETE /api/threads/{id} - Delete thread │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ │ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ ChatAgent │ │ -│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ -│ │ │ WeatherTool │ │ SearchTool │ │ CalculatorTool │ │ │ -│ │ └─────────────┘ └─────────────┘ └─────────────┘ │ │ -│ │ │ │ -│ │ Agent autonomously selects tools based on user intent │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────────────────┘ - │ - ┌────────────────┼────────────────┐ - ▼ ▼ ▼ - ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ - │ Azure OpenAI │ │ Cosmos DB │ │ App │ - │ (GPT-4o) │ │ (NoSQL) │ │ Insights │ - │ │ │ │ │ │ - │ Chat model │ │ Threads & │ │ Telemetry │ - │ completions │ │ Messages │ │ & Tracing │ - └──────────────┘ └──────────────┘ └──────────────┘ -``` - -## Key Design Decisions - -### 1. Runtime Tool Selection (Agent Autonomy) - -The agent is configured with multiple tools but **decides at runtime** which tool(s) to invoke: - -```python -# Tools are registered, but agent decides when to use them -agent = ChatAgent( - chat_client=azure_openai_client, - instructions="You are a helpful assistant. Use available tools when needed.", - tools=[ - get_weather, # Weather information - search_web, # Web search - calculate, # Math operations - get_stock_price, # Stock quotes - ] -) - -# User asks: "What's the weather in Seattle and what's 15% tip on $85?" -# Agent autonomously invokes: get_weather("Seattle") AND calculate("85 * 0.15") -``` - -### 2. Cosmos DB Persistence Strategy - -**Data Model**: One document per message (optimized for append-heavy workloads) - -```json -{ - "id": "msg_abc123", - "thread_id": "thread_xyz789", - "role": "user", - "content": "What's the weather in Seattle?", - "timestamp": "2024-12-06T10:30:00Z", - "metadata": { - "tool_calls": null, - "model": null - } -} -``` - -**Partition Strategy**: - -- **Partition Key**: `/thread_id` (optimal for retrieving all messages in a conversation) -- All messages for a thread are stored together, enabling efficient queries - -### 3. Azure Functions Hosting - -Using **HTTP Triggers** for a familiar REST API pattern: - -- Standard HTTP trigger endpoints (POST, GET, DELETE) -- Explicit state management via Cosmos DB -- Flex Consumption plan for serverless scaling (0 to thousands of instances) -- Simple deployment model using Azure Functions Core Tools or `azd` - -### 4. Simple Thread-Based Architecture - -```python -# Thread isolation via partition key -async def get_thread_messages(thread_id: str): - query = "SELECT * FROM c WHERE c.thread_id = @thread_id ORDER BY c.timestamp" - return await container.query_items( - query=query, - parameters=[{"name": "@thread_id", "value": thread_id}], - partition_key=thread_id # Scoped to thread's partition - ) -``` - -## API Design - -### Endpoints - -| Method | Path | Description | -|--------|------|-------------| -| `POST` | `/api/threads` | Create a new conversation thread | -| `GET` | `/api/threads/{thread_id}` | Get thread metadata | -| `DELETE` | `/api/threads/{thread_id}` | Delete a thread and its messages | -| `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | -| `GET` | `/api/threads/{thread_id}/messages` | Get conversation history | - -### Request/Response Examples - -**Create Thread** - -```http -POST /api/threads - -{ - "metadata": { - "user_id": "user_123", - "session_type": "support" - } -} -``` - -**Send Message** - -```http -POST /api/threads/thread_xyz789/messages -Content-Type: application/json - -{ - "content": "What's the weather in Seattle and calculate 15% tip on $85?" -} -``` - -**Response** (with tool usage) - -```json -{ - "id": "msg_resp_456", - "thread_id": "thread_xyz789", - "role": "assistant", - "content": "The weather in Seattle is 52°F with light rain. A 15% tip on $85 is $12.75.", - "tool_calls": [ - { - "tool": "get_weather", - "arguments": {"location": "Seattle"}, - "result": {"temp": 52, "condition": "light rain"} - }, - { - "tool": "calculate", - "arguments": {"expression": "85 * 0.15"}, - "result": 12.75 - } - ], - "timestamp": "2024-12-06T10:30:05Z" -} -``` - -## E2E Code Samples - -### Basic Usage (Phase 1) - -```python -from azure.identity import DefaultAzureCredential -from microsoft.agents.ai.azure import AzureOpenAIChatClient -from microsoft.agents.core import ChatAgent - -# Initialize Azure OpenAI client -credential = DefaultAzureCredential() -chat_client = AzureOpenAIChatClient( - endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - model=os.environ["AZURE_OPENAI_MODEL"], - credential=credential, -) - -# Define tools - agent will decide when to use them -@ai_function -def get_weather(location: str) -> dict: - """Get current weather for a location.""" - # Implementation here - return {"temp": 52, "condition": "light rain", "location": location} - -@ai_function -def calculate(expression: str) -> float: - """Evaluate a mathematical expression.""" - # Safe evaluation implementation - return eval(expression) # Use safe_eval in production - -@ai_function -def search_knowledge_base(query: str) -> list[dict]: - """Search the knowledge base for relevant information.""" - # Could connect to Azure AI Search, Cosmos DB, etc. - return [{"title": "...", "content": "..."}] - -# Create agent with multiple tools -agent = ChatAgent( - chat_client=chat_client, - instructions="""You are a helpful assistant. - Use the available tools when they can help answer the user's question. - You can use multiple tools in a single response if needed.""", - tools=[get_weather, calculate, search_knowledge_base], -) - -# Agent autonomously decides which tools to use -response = await agent.run("What's the weather in NYC and what's 20% of 150?") -# Agent will call: get_weather("NYC") AND calculate("150 * 0.20") -``` - -### With Cosmos DB Persistence (Phase 2) - -```python -from microsoft.agents.stores.cosmosdb import CosmosDBChatMessageStore - -# Initialize Cosmos DB store -message_store = CosmosDBChatMessageStore( - endpoint=os.environ["COSMOS_ENDPOINT"], - database_name="chat_db", - container_name="messages", - credential=DefaultAzureCredential(), -) - -# Create agent with persistent storage -agent = ChatAgent( - chat_client=chat_client, - instructions="...", - tools=[get_weather, calculate, search_knowledge_base], - message_store=message_store, # Persistent storage -) - -# Messages are automatically persisted -thread_id = "thread_abc123" -response = await agent.run( - "What's the weather?", - thread_id=thread_id, -) -``` - -### Azure Functions Integration (Phase 3) - -```python -# function_app.py -import azure.functions as func -from microsoft.agents.ai.azure import AzureOpenAIChatClient -from microsoft.agents.core import ChatAgent -from microsoft.agents.stores.cosmosdb import CosmosDBChatMessageStore - -app = func.FunctionApp() - -# Singleton instances (reused across invocations) -chat_client = None -message_store = None -agent = None - -def get_agent(): - global chat_client, message_store, agent - if agent is None: - chat_client = AzureOpenAIChatClient(...) - message_store = CosmosDBChatMessageStore(...) - agent = ChatAgent( - chat_client=chat_client, - tools=[get_weather, calculate, search_knowledge_base], - message_store=message_store, - ) - return agent - -@app.route(route="threads/{thread_id}/messages", methods=["POST"]) -async def send_message(req: func.HttpRequest) -> func.HttpResponse: - thread_id = req.route_params.get("thread_id") - body = req.get_json() - - agent = get_agent() - response = await agent.run( - body["content"], - thread_id=thread_id, - ) - - return func.HttpResponse( - body=json.dumps(response.to_dict()), - mimetype="application/json", - ) -``` - -## Phased Implementation Plan - -### Phase 1: Core Chat API with Cosmos DB Persistence ✅ - -**Goal**: Demonstrate runtime tool selection with persistent storage - -- [x] Azure Functions HTTP triggers -- [x] Function tools (weather, calculator, knowledge base) -- [x] Cosmos DB thread and message persistence -- [x] `demo.http` file for testing -- [x] README with setup instructions -- [x] Infrastructure as Code (Bicep + azd) - -**Files**: - -```text -python/samples/demos/enterprise-chat-agent/ -├── README.md -├── requirements.txt -├── local.settings.json.example -├── host.json -├── function_app.py -├── cosmos_store.py # Cosmos DB conversation store -├── tools/ -│ ├── __init__.py -│ ├── weather.py -│ ├── calculator.py -│ └── knowledge_base.py -├── demo.http -├── azure.yaml # azd configuration -└── infra/ - ├── main.bicep - ├── abbreviations.json - └── core/ - ├── database/cosmos-nosql.bicep - ├── host/function-app.bicep - ├── monitor/monitoring.bicep - └── storage/storage-account.bicep -``` - -### Phase 2: Agent Framework Integration (PR #2) - -**Goal**: Integrate with Microsoft Agent Framework - -- [ ] Replace placeholder logic with `ChatAgent` -- [ ] Azure OpenAI integration via Agent Framework -- [ ] Conversation history passed to agent for context -- [ ] Tool execution via Agent Framework runtime - -### Phase 3: Production Hardening (PR #3) - -**Goal**: Enterprise-ready patterns - -- [ ] Managed Identity authentication -- [ ] OpenTelemetry tracing integration -- [ ] Structured logging -- [ ] Health check endpoint -- [ ] Retry policies and error handling - -### Phase 4: Observability Dashboard (PR #4) - -**Goal**: Operational visibility - -- [ ] Application Insights integration -- [ ] Custom metrics (tokens, latency, tool usage) -- [ ] Sample Kusto queries -- [ ] Azure Dashboard template (optional) - -### Phase 5: Redis Caching Extension (Future) - -**Goal**: High-frequency access optimization - -- [ ] Redis session cache -- [ ] Recent messages caching -- [ ] Rate limiting support - -## Security Considerations - -| Concern | Mitigation | -|---------|------------| -| **Authentication** | Azure AD / API Key via `X-API-Key` header | -| **Thread Isolation** | Cosmos DB partition key on `thread_id` | -| **Secrets Management** | Azure Key Vault for connection strings | -| **Network Security** | Private Endpoints for Cosmos DB & OpenAI | -| **Input Validation** | Pydantic models for request validation | - -## Testing Strategy - -1. **Unit Tests**: Tool functions, message store operations -2. **Integration Tests**: Cosmos DB emulator, Azure OpenAI mock -3. **E2E Tests**: Full API flow with `demo.http` -4. **Load Tests**: Azure Load Testing for scale validation - -## Open Questions - -1. **Package location**: Should `CosmosDBChatMessageStore` be a new package or part of existing `stores` package? -2. **Streaming support**: Should Phase 1 include SSE streaming responses? - -## Appendix: Tool Selection Examples - -### Example 1: Single Tool - -```text -User: "What's the weather in Tokyo?" -Agent Decision: → get_weather("Tokyo") -Response: "The weather in Tokyo is 68°F and sunny." -``` - -### Example 2: Multiple Tools - -```text -User: "What's the weather in Paris and what's 18% tip on €75?" -Agent Decision: → get_weather("Paris") + calculate("75 * 0.18") -Response: "Paris is 55°F with clouds. An 18% tip on €75 is €13.50." -``` - -### Example 3: No Tools Needed - -```text -User: "Tell me a joke" -Agent Decision: → No tools (direct response) -Response: "Why don't scientists trust atoms? Because they make up everything!" -``` - -### Example 4: Tool Selection Based on Context - -```text -User: "I need help with my order" -Agent Decision: → search_knowledge_base("order help support FAQ") -Response: "Based on our FAQ, here's how to check your order status..." -``` diff --git a/python/samples/demos/enterprise-chat-agent/tools/__init__.py b/python/samples/demos/enterprise-chat-agent/tools/__init__.py deleted file mode 100644 index 11b66644f0..0000000000 --- a/python/samples/demos/enterprise-chat-agent/tools/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -""" -Enterprise Chat Agent - Function Tools - -This module contains the tools that the ChatAgent can invoke at runtime. -The agent autonomously decides which tools to use based on the user's message. -""" - -from tools.weather import get_weather -from tools.calculator import calculate -from tools.knowledge_base import search_knowledge_base - -__all__ = [ - "get_weather", - "calculate", - "search_knowledge_base", -] From 8c7634d80c830fcabe1b33b2c2a5fc306f131359 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:26:28 +0000 Subject: [PATCH 04/10] Refactor Enterprise Chat Agent design and observability - Updated DESIGN.md to reflect new architecture and goals for the Enterprise Chat Agent, including enhanced agent autonomy and integration with Microsoft Docs. - Removed MCP_INTEGRATION.md and observability-design.md as their content is now integrated into DESIGN.md. - Added new API endpoint to list conversation threads with optional filters for user ID and status. - Implemented Cosmos DB query logic for listing threads in cosmos_store.py. - Enhanced observability by initializing logging levels for httpx and httpcore to reduce verbosity. - Updated routes/messages.py to utilize AgentSession for better context management during agent runs. - Added debug endpoint to list session IDs for troubleshooting. - Improved logging throughout the message handling process for better traceability. --- .../enterprise-chat-agent/README.md | 129 ++++- .../enterprise-chat-agent/demo-ui.html | 443 ++++++++++++++++++ .../enterprise-chat-agent/demo.http | 15 + .../docs/AGENT_IMPLEMENTATION.md | 224 --------- .../enterprise-chat-agent/docs/DESIGN.md | 363 +++++++++----- .../docs/MCP_INTEGRATION.md | 164 ------- .../docs/observability-design.md | 243 ---------- .../enterprise-chat-agent/host.json | 6 + .../local.settings.json.example | 4 + .../enterprise-chat-agent/routes/messages.py | 19 +- .../enterprise-chat-agent/routes/threads.py | 99 ++++ .../services/cosmos_store.py | 55 +++ .../services/observability.py | 5 + 13 files changed, 1007 insertions(+), 762 deletions(-) create mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html delete mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md delete mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md delete mode 100644 python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/README.md b/python/samples/05-end-to-end/enterprise-chat-agent/README.md index 45770b5d4d..1ca7747520 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/README.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/README.md @@ -133,11 +133,33 @@ curl -X POST http://localhost:7071/api/threads/{thread_id}/messages \ | Method | Path | Description | |--------|------|-------------| | `POST` | `/api/threads` | Create a new conversation thread | +| `GET` | `/api/threads` | List all threads (with optional filters) | | `GET` | `/api/threads/{thread_id}` | Get thread metadata | | `DELETE` | `/api/threads/{thread_id}` | Delete a thread | | `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | | `GET` | `/api/threads/{thread_id}/messages` | Get conversation history | +### Query Parameters for List Threads + +| Parameter | Type | Description | +|-----------|------|-------------| +| `user_id` | string | Filter threads by user ID | +| `status` | string | Filter by status: `active`, `archived`, `deleted` | +| `limit` | int | Max threads to return (default 50, max 100) | +| `offset` | int | Skip N threads for pagination | + +**Examples:** +```bash +# List all threads +GET /api/threads + +# List threads for a specific user +GET /api/threads?user_id=user_1234 + +# List active threads with pagination +GET /api/threads?status=active&limit=20&offset=0 +``` + ## Tool Selection Demo The agent is configured with multiple tools and **decides at runtime** which to use: @@ -174,6 +196,76 @@ User: "Tell me a joke" | `get_weather` | Current weather data | Weather queries | | `calculate` | Safe math evaluation | Calculations, tips, conversions | +## Streaming Responses + +### Current Approach + +This sample uses **buffered responses** - the agent processes the entire message and returns the complete response at once. This works well with Azure Functions and is simpler to implement. + +### Streaming Support in Agent Framework + +The Agent Framework supports streaming via `ResponseStream`: + +```python +from agent_framework import Agent, AgentSession + +# Enable streaming +response_stream = await agent.run( + prompt="Hello, world!", + session=session, + stream=True # Returns ResponseStream instead of Response +) + +# Iterate over chunks as they arrive +async for chunk in response_stream: + print(chunk.content, end="", flush=True) +``` + +### Why This Sample Doesn't Use Streaming + +**Azure Functions buffers HTTP responses** - even with Server-Sent Events (SSE) or chunked transfer encoding, Azure Functions collects the entire response before sending it to the client. This means true streaming isn't achievable without additional infrastructure. + +### Streaming Alternatives + +If you need true streaming for a production chat experience, consider these options: + +| Option | Description | Pros | Cons | +|--------|-------------|------|------| +| **FastAPI/Starlette** | Deploy as a container with native async streaming | True SSE streaming, simple to implement | Need container hosting (App Service, ACA) | +| **Azure Container Apps** | Host a streaming-capable web framework | Native streaming, auto-scaling | More infrastructure to manage | +| **Azure Web PubSub** | Real-time messaging service | True real-time, scalable | Additional service cost, more complexity | +| **Azure SignalR** | Managed SignalR service | WebSocket support, .NET integration | Adds dependency | + +#### FastAPI Streaming Example + +```python +from fastapi import FastAPI +from fastapi.responses import StreamingResponse +from agent_framework import Agent, AgentSession + +app = FastAPI() + +@app.post("/api/threads/{thread_id}/messages/stream") +async def send_message_stream(thread_id: str, request: MessageRequest): + async def generate(): + session = AgentSession(session_id=thread_id) + response_stream = await agent.run( + prompt=request.content, + session=session, + stream=True + ) + async for chunk in response_stream: + yield f"data: {json.dumps({'content': chunk.content})}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream") +``` + +### Recommendation + +- **For demos/prototypes**: Use buffered responses (this sample) with a typing indicator in the UI +- **For production chat UIs**: Consider FastAPI on Azure Container Apps or Web PubSub for true streaming + ## Project Structure ```text @@ -184,38 +276,35 @@ enterprise-chat-agent/ ├── requirements.txt # Python dependencies ├── local.settings.json.example ├── host.json # Azure Functions host config -├── function_app.py # HTTP trigger endpoints -├── cosmos_store.py # Cosmos DB conversation store +├── function_app.py # Azure Functions entry point +├── demo.http # API test requests +├── demo-ui.html # Browser-based demo UI +├── services/ +│ ├── agent_service.py # ChatAgent + CosmosHistoryProvider +│ ├── cosmos_store.py # Thread metadata storage +│ └── observability.py # OpenTelemetry instrumentation +├── routes/ +│ ├── threads.py # Thread CRUD endpoints +│ ├── messages.py # Message endpoint +│ └── health.py # Health check ├── tools/ -│ ├── __init__.py │ ├── weather.py # Weather tool │ ├── calculator.py # Calculator tool │ ├── knowledge_base.py # Knowledge base search tool │ └── microsoft_docs.py # Microsoft Docs MCP integration -├── infra/ # Infrastructure as Code (Bicep) -│ ├── main.bicep # Main deployment template -│ ├── main.parameters.json # Parameter file -│ ├── abbreviations.json # Resource naming abbreviations -│ └── core/ -│ ├── database/ -│ │ └── cosmos-nosql.bicep -│ ├── host/ -│ │ └── function-app.bicep -│ ├── monitor/ -│ │ └── monitoring.bicep -│ └── storage/ -│ └── storage-account.bicep -└── demo.http # Test requests +└── infra/ # Infrastructure as Code (Bicep) + ├── main.bicep # Main deployment template + └── core/ # Modular Bicep components ``` ## Design Documentation See [DESIGN.md](./DESIGN.md) for: -- Detailed architecture decisions +- Architecture diagrams and message processing flow - Cosmos DB data model and partition strategy -- Thread-based conversation isolation -- Phased implementation plan +- Observability span hierarchy (framework vs custom) +- Tool selection and MCP integration details - Security considerations ## Related Resources diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html b/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html new file mode 100644 index 0000000000..8b9db8e4fe --- /dev/null +++ b/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html @@ -0,0 +1,443 @@ + + + + + + Enterprise Chat Agent Demo + + + + + +
+
+
Select or create a chat
+
+
+

👋 Welcome!

+

Create a new chat to get started

+
+
+
+ + +
+
+ + + + diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/demo.http b/python/samples/05-end-to-end/enterprise-chat-agent/demo.http index 348331cf4b..7221f8d51b 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/demo.http +++ b/python/samples/05-end-to-end/enterprise-chat-agent/demo.http @@ -34,6 +34,21 @@ Content-Type: application/json ### Get thread details GET {{baseUrl}}/threads/{{threadId}} +### List all threads +GET {{baseUrl}}/threads + +### List threads for a specific user +GET {{baseUrl}}/threads?user_id=user_1234 + +### List active threads only +GET {{baseUrl}}/threads?status=active + +### List threads with pagination +GET {{baseUrl}}/threads?limit=10&offset=0 + +### List threads with all filters +GET {{baseUrl}}/threads?user_id=user_1234&status=active&limit=20 + ### ============================================================ ### Tool Selection Examples ### ============================================================ diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md deleted file mode 100644 index 2f62b12e8d..0000000000 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/AGENT_IMPLEMENTATION.md +++ /dev/null @@ -1,224 +0,0 @@ -# Agent Implementation Guide - -## What Was Implemented - -### 1. ChatAgent Service with CosmosHistoryProvider (`services/agent_service.py`) - -Created a new service module that: -- Initializes a singleton `ChatAgent` instance -- Configures `CosmosHistoryProvider` for automatic conversation persistence -- Configures Azure OpenAI client with Azure AD authentication -- Registers 5 tools: `get_weather`, `calculate`, `search_knowledge_base`, `search_microsoft_docs`, `search_microsoft_code_samples` -- Defines system instructions for the agent - -**Key Features:** -```python -from agent_framework_azure_cosmos import CosmosHistoryProvider - -# History provider automatically loads/stores conversation history -history_provider = CosmosHistoryProvider( - source_id="enterprise_chat_agent", - endpoint=os.environ["AZURE_COSMOS_ENDPOINT"], - database_name="chat_db", - container_name="messages", - credential=DefaultAzureCredential(), - load_messages=True, # Auto-load history before each run - store_inputs=True, # Auto-store user messages - store_outputs=True, # Auto-store assistant responses -) - -# Agent uses history provider as context provider -agent = ChatAgent( - chat_client=client, - instructions="You are a helpful enterprise chat assistant...", - tools=[get_weather, calculate, search_knowledge_base, ...], - context_providers=[history_provider], # Auto-persist history! - name="EnterpriseAssistant", -) -``` - -### 2. Tool Updates - -All tools use the `@ai_function` decorator: - -```python -from agent_framework.ai import ai_function - -@ai_function -def get_weather(location: str) -> dict: - """Get current weather for a location.""" - ... - -@ai_function -def search_microsoft_docs(query: str) -> list[dict]: - """Search official Microsoft documentation.""" - ... -``` - -This decorator enables the agent to: -- Discover and call tools automatically -- Generate proper function call schemas -- Handle tool execution and response parsing - -### 3. Simplified Message Route (`routes/messages.py`) - -**Before (Manual storage):** -```python -# Store user message manually -await store.add_message(thread_id, user_message_id, "user", content) - -# Load history manually -message_history = await store.get_messages(thread_id) -chat_messages = convert_messages_to_chat_messages(message_history) - -# Run agent -response = await agent.run(chat_messages) - -# Store response manually -await store.add_message(thread_id, assistant_message_id, "assistant", response.content) -``` - -**After (With CosmosHistoryProvider):** -```python -# Get agent (configured with CosmosHistoryProvider) -agent = get_agent() - -# Run agent - history is loaded and stored automatically! -response = await agent.run(content, session_id=thread_id) -``` - -The `CosmosHistoryProvider` handles all message persistence automatically: -- Loads conversation history before each `agent.run()` -- Stores user input after each run -- Stores assistant response after each run -- Uses `session_id` as the Cosmos DB partition key - -## How It Works - -### Flow Diagram - -``` -User Request - ↓ -POST /api/threads/{thread_id}/messages - ↓ -1. Validate thread exists - ↓ -2. agent.run(content, session_id=thread_id) - ↓ - ┌─────────────────────────────────────────┐ - │ CosmosHistoryProvider (automatic): │ - │ • Load previous messages from Cosmos │ - │ • Add to agent context │ - └─────────────────────────────────────────┘ - ↓ -3. Agent analyzes context and decides tools - ↓ -4. Agent automatically calls tools as needed: - - get_weather("Seattle") - - calculate("85 * 0.15") - - search_microsoft_docs("Azure Functions") - ↓ - ┌─────────────────────────────────────────┐ - │ CosmosHistoryProvider (automatic): │ - │ • Store user message to Cosmos │ - │ • Store assistant response to Cosmos │ - └─────────────────────────────────────────┘ - ↓ -5. Return response to user -``` - -### Example Interactions - -**Weather Query:** -``` -User: "What's the weather in Tokyo?" -→ Agent calls: get_weather("Tokyo") -→ Response: "The weather in Tokyo is 72°F with partly cloudy conditions." -``` - -**Multi-tool Query:** -``` -User: "What's the weather in Paris and what's 18% tip on €75?" -→ Agent calls: get_weather("Paris") AND calculate("75 * 0.18") -→ Response: "The weather in Paris is 65°F with light rain. An 18% tip on €75 is €13.50." -``` - -**Microsoft Docs Query:** -``` -User: "How do I deploy Azure Functions with Python?" -→ Agent calls: search_microsoft_docs("Azure Functions Python deployment") -→ Response: "To deploy Azure Functions with Python, you can use..." -``` - -**No Tools Needed:** -``` -User: "Tell me a joke" -→ Agent responds directly (no tools called) -→ Response: "Why did the programmer quit? Because they didn't get arrays!" -``` - -## Environment Variables Required - -Make sure your `local.settings.json` includes: - -```json -{ - "Values": { - "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", - "AZURE_OPENAI_MODEL": "gpt-4o", - "AZURE_OPENAI_API_VERSION": "2024-10-21", - "AZURE_COSMOS_ENDPOINT": "https://your-cosmos.documents.azure.com:443/", - "AZURE_COSMOS_DATABASE_NAME": "chat_db", - "AZURE_COSMOS_CONTAINER_NAME": "messages", - "AZURE_COSMOS_THREADS_CONTAINER_NAME": "threads" - } -} -``` - -**Note:** Two containers are used: -- `AZURE_COSMOS_CONTAINER_NAME` - Messages (managed by `CosmosHistoryProvider`) -- `AZURE_COSMOS_THREADS_CONTAINER_NAME` - Thread metadata (managed by `CosmosConversationStore`) - -## Next Steps - -### Local Testing -```bash -# Install dependencies -pip install -r requirements.txt - -# Start the function app -func start - -# Test with demo.http or curl -curl -X POST http://localhost:7071/api/threads -curl -X POST http://localhost:7071/api/threads/{thread_id}/messages \ - -H "Content-Type: application/json" \ - -d '{"content": "What is the weather in Seattle?"}' -``` - -### Deploy to Azure -```bash -azd auth login -azd up -``` - -## Key Benefits of This Implementation - -1. **Intelligent Tool Selection**: The LLM decides which tools to use based on context -2. **Multi-tool Coordination**: Can call multiple tools in one response -3. **Automatic History Persistence**: `CosmosHistoryProvider` handles message storage automatically -4. **Simplified Code**: No manual message load/store - just `agent.run(content, session_id=...)` -5. **Production Ready**: Includes error handling, observability, and security -6. **Scalable**: Serverless Azure Functions with serverless Cosmos DB -7. **Observable**: OpenTelemetry spans for all operations - -## Architecture Pattern - -This implementation demonstrates the **Agent with Tools** pattern: -- Single AI agent (not a workflow) -- Dynamic tool selection by LLM -- Suitable for chat-based RAG applications -- Simple, maintainable, and efficient - -For complex multi-agent orchestration, consider using [Microsoft Agent Framework Workflows](https://learn.microsoft.com/agent-framework/user-guide/workflows/overview). diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md index 35abf45d81..ab84768f26 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md @@ -2,44 +2,33 @@ status: in-progress contact: @vj-msft date: 2024-12-06 -updated: 2026-03-24 -deciders: TBD -consulted: TBD -informed: TBD +updated: 2026-03-25 --- -# Production Chat API with Azure Functions, Cosmos DB & Agent Framework +# Enterprise Chat Agent — Design Document -## References +## Overview -- **GitHub Issue**: [#2436 - Python: [Sample Request] Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) -- **Microsoft Documentation**: - - [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) - - [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) - - [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) - - [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) +This document describes the architecture and design decisions for a **production-ready Chat API** built with Microsoft Agent Framework, Azure Functions, and Cosmos DB. -## What is the goal of this feature? +### Goals -Provide a **production-ready sample** demonstrating how to build a scalable Chat API using the Microsoft Agent Framework with: +1. Demonstrate enterprise patterns: state persistence, observability, and thread-based conversations +2. Showcase **agent autonomy** — the agent decides which tools to invoke at runtime based on conversation context +3. Provide a reference architecture for deploying Agent Framework in production +4. Enable one-command deployment with `azd up` -1. **Azure Functions** for serverless, scalable hosting -2. **Azure Cosmos DB** for durable conversation persistence -3. **Function Tools** showcasing runtime tool selection by the agent +### References -### Value Proposition - -- Developers can use this sample as a reference architecture for deploying Agent Framework in production -- Demonstrates enterprise patterns: state persistence, observability, and thread-based conversations -- Shows the power of **agent autonomy** - the agent decides which tools to invoke at runtime based on conversation context - -### Success Metrics - -1. Sample is referenced in at least 3 external blog posts/tutorials within 6 months -2. Sample serves as the canonical reference for "Agent Framework + Azure Functions + Cosmos DB" stack +- **GitHub Issue**: [#2436 - Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) +- [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) +- [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) +- [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) +- [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) +--- -## Architecture Overview +## Architecture ```mermaid flowchart TB @@ -86,6 +75,54 @@ flowchart TB Code --> MCPDocs ``` +### Components + +| Component | Technology | Purpose | +|-----------|------------|---------| +| API Layer | Azure Functions (Flex Consumption) | Serverless HTTP endpoints | +| Agent | Microsoft Agent Framework ChatAgent | Conversation orchestration with tools | +| LLM | Azure OpenAI (GPT-4o) | Language model for responses | +| Message Storage | Cosmos DB + CosmosHistoryProvider | Automatic conversation persistence | +| Thread Metadata | Cosmos DB + CosmosConversationStore | Thread lifecycle management | +| External Knowledge | MCP (Microsoft Learn) | Official documentation access | +| Observability | OpenTelemetry + Application Insights | Tracing and monitoring | + +--- + +## Message Processing Flow + +```text +User Request + ↓ +POST /api/threads/{thread_id}/messages + ↓ +1. Validate thread exists (Cosmos DB lookup) + ↓ +2. agent.run(content, session=AgentSession(thread_id)) + ↓ + ┌─────────────────────────────────────────┐ + │ CosmosHistoryProvider (automatic): │ + │ • Load previous messages from Cosmos │ + │ • Add to agent context │ + └─────────────────────────────────────────┘ + ↓ +3. Agent analyzes context and decides which tools to use + ↓ +4. Agent automatically calls tools as needed: + • get_weather("Seattle") + • calculate("85 * 0.15") + • search_microsoft_docs("Azure Functions") + ↓ + ┌─────────────────────────────────────────┐ + │ CosmosHistoryProvider (automatic): │ + │ • Store user message to Cosmos │ + │ • Store assistant response to Cosmos │ + └─────────────────────────────────────────┘ + ↓ +5. Return response to user +``` + +--- ## Key Design Decisions @@ -93,51 +130,66 @@ flowchart TB The agent is configured with multiple tools but **decides at runtime** which tool(s) to invoke based on user intent. Tools are registered once; the agent autonomously selects which to use for each request. -**Implemented Tools**: -| Tool | Purpose | Status | +| Tool | Purpose | Source | |------|---------|--------| -| `get_weather` | Weather information | ✅ Simulated | -| `calculate` | Math expressions | ✅ Safe AST eval | -| `search_knowledge_base` | FAQ/KB search | ✅ Simulated | -| `microsoft_docs_search` | Microsoft Learn search | ✅ MCP | -| `microsoft_code_sample_search` | Code sample search | ✅ MCP | +| `get_weather` | Weather information | Local (simulated) | +| `calculate` | Math expressions | Local (safe AST eval) | +| `search_knowledge_base` | FAQ/KB search | Local (simulated) | +| `microsoft_docs_search` | Microsoft Learn search | MCP Server | +| `microsoft_code_sample_search` | Code sample search | MCP Server | + +**Example Interactions:** + +| User Query | Tool(s) Called | +|------------|----------------| +| "What's the weather in Tokyo?" | `get_weather("Tokyo")` | +| "What's the weather in Paris and what's 18% tip on €75?" | `get_weather("Paris")` AND `calculate("75 * 0.18")` | +| "How do I configure partition keys in Azure Cosmos DB?" | `search_microsoft_docs("Cosmos DB partition keys")` | +| "Tell me a joke" | (No tools — direct response) | ### 2. Cosmos DB Persistence Strategy -**Two-Container Approach**: +**Two-Container Approach:** -| Container | Purpose | Managed By | -|-----------|---------|------------| -| `threads` | Thread metadata (user_id, title, timestamps) | `CosmosConversationStore` (custom) | -| `messages` | Conversation messages | `CosmosHistoryProvider` (framework) | +| Container | Purpose | Managed By | Partition Key | +|-----------|---------|------------|---------------| +| `threads` | Thread metadata (user_id, title, timestamps) | `CosmosConversationStore` (custom) | `/id` | +| `messages` | Conversation messages | `CosmosHistoryProvider` (framework) | `/session_id` | -**CosmosHistoryProvider** from `agent-framework-azure-cosmos` ([PR #4271](https://github.com/microsoft/agent-framework/pull/4271)) automatically: +**CosmosHistoryProvider** from `agent-framework-azure-cosmos` automatically: - Loads conversation history before each agent run - Stores user inputs and agent responses after each run -- Uses `session_id` (thread_id) as the partition key - -**Partition Strategy**: -- **Messages**: `/session_id` - all messages for a thread stored together -- **Threads**: `/id` - thread metadata isolated by thread_id -- `source_id` field allows multiple agents to share a container +- Uses `session_id` (which equals `thread_id`) as the partition key +- Supports `source_id` field allowing multiple agents to share a container ### 3. Azure Functions Hosting Using **HTTP Triggers** for a familiar REST API pattern: -- Standard HTTP trigger endpoints (POST, GET, DELETE) -- Singleton pattern for agent and history provider (reused across invocations) -- Flex Consumption plan for serverless scaling -- Simple deployment via `azd up` +| Aspect | Choice | Rationale | +|--------|--------|-----------| +| Trigger Type | HTTP Triggers | Standard REST API pattern | +| Hosting Plan | Flex Consumption | Serverless scaling, cost-effective | +| Agent Lifecycle | Singleton pattern | Reused across invocations | +| Deployment | `azd up` | One-command infrastructure + code | -### 4. Observability +### 4. MCP Integration for Microsoft Docs -Using Agent Framework's `setup_observability()` with custom spans for: -- HTTP request lifecycle -- Cosmos DB operations -- Request validation +**Model Context Protocol (MCP)** provides access to official Microsoft documentation: +- Official Microsoft Learn documentation +- Azure service documentation +- Code samples and examples +- API references -Exporters: OTLP and Azure Monitor (Application Insights) +The integration uses `MCPStreamableHTTPTool` with per-request connections (serverless-friendly pattern). + +**Benefits:** +- ✅ Authoritative information from official sources +- ✅ Always current with latest product updates +- ✅ Reduces hallucination by grounding in actual documentation +- ✅ Real, tested code samples + +--- ## API Design @@ -146,69 +198,118 @@ Exporters: OTLP and Azure Monitor (Application Insights) | Method | Path | Description | |--------|------|-------------| | `POST` | `/api/threads` | Create a new conversation thread | +| `GET` | `/api/threads` | List all threads (with optional filters) | | `GET` | `/api/threads/{thread_id}` | Get thread metadata | | `DELETE` | `/api/threads/{thread_id}` | Delete a thread and its messages | | `POST` | `/api/threads/{thread_id}/messages` | Send a message and get response | +| `GET` | `/api/threads/{thread_id}/messages` | Get conversation history | | `GET` | `/api/health` | Health check | +### Query Parameters for List Threads + +| Parameter | Type | Description | +|-----------|------|-------------| +| `user_id` | string | Filter threads by user ID | +| `status` | string | Filter by status: `active`, `archived`, `deleted` | +| `limit` | int | Max threads to return (default 50, max 100) | +| `offset` | int | Skip N threads for pagination | + ### Request/Response Behavior -**Create Thread**: Accepts optional `user_id`, `title`, and `metadata`. Returns created thread with generated `thread_id`. +| Endpoint | Behavior | +|----------|----------| +| **Create Thread** | Accepts optional `user_id`, `title`, `metadata`. Returns created thread with generated `thread_id`. | +| **Send Message** | Accepts `content` string. Agent loads history, processes request (with tools as needed), persists conversation. Returns assistant response with tool calls made. | +| **Delete Thread** | Removes thread metadata and clears all messages from the history provider. | + +--- -**Send Message**: Accepts `content` string. Agent automatically loads history, processes request (with tool calls as needed), and persists the conversation. Returns assistant response with any tool calls made. +## Observability -**Delete Thread**: Removes thread metadata and clears all messages from the history provider. +### Design Principles -See [demo.http](../demo.http) for complete request/response examples. +1. **Don't duplicate framework instrumentation** — Use the Agent Framework's automatic spans for agent/LLM/tool tracing +2. **Fill the gaps** — Add manual spans only for layers the framework cannot see (HTTP, Cosmos DB, validation) +3. **Use framework APIs** — Leverage `setup_observability()`, `get_tracer()`, and `get_meter()` from `agent_framework` -## Implementation Status +### Framework Built-in Instrumentation (Automatic) -### Phase 1: Core Chat API ✅ +| Span Name Pattern | When Created | Key Attributes | +|---|---|---| +| `invoke_agent {agent_name}` | `agent.run()` | `gen_ai.agent.id`, `gen_ai.agent.name`, `gen_ai.conversation.id` | +| `chat {model_id}` | `chat_client.get_response()` | `gen_ai.request.model`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens` | +| `execute_tool {function_name}` | Tool invocations | `gen_ai.tool.name`, `gen_ai.tool.call.id`, `gen_ai.tool.type` | -- [x] Azure Functions HTTP triggers -- [x] ChatAgent with Azure OpenAI -- [x] Local tools (weather, calculator, knowledge base) -- [x] `CosmosHistoryProvider` for automatic message persistence -- [x] `CosmosConversationStore` for thread metadata -- [x] `demo.http` file for testing -- [x] README with setup instructions -- [x] Infrastructure as Code (Bicep + azd) +### Custom Spans (Manual) -### Phase 2: Observability ✅ +| Layer | Span Name Pattern | Purpose | +|-------|-------------------|---------| +| HTTP Request | `http.request {method} {path}` | Track request lifecycle | +| Cosmos DB | `cosmos.{operation} {container}` | Track database operations | +| Redis | `redis.{operation} {key_pattern}` | Track caching operations | +| AI Search | `ai_search.{operation} {index}` | Track search operations | +| Validation | `request.validate {operation}` | Track authorization checks | -- [x] OpenTelemetry integration via Agent Framework -- [x] Custom spans for HTTP requests and Cosmos operations -- [x] Structured logging -- [x] Health check endpoint +### Span Hierarchy -### Phase 3: MCP Integration ✅ +```text +http.request POST /threads/{thread_id}/messages ← MANUAL (HTTP layer) +├── cosmos.read threads ← MANUAL (Cosmos layer) +├── request.validate verify_thread_ownership ← MANUAL (Validation) +├── invoke_agent ChatAgent ← FRAMEWORK (automatic) +│ ├── chat gpt-4o ← FRAMEWORK (automatic) +│ │ └── (internal LLM call spans) +│ └── execute_tool get_weather ← FRAMEWORK (automatic) +├── cosmos.upsert threads ← MANUAL (Cosmos layer) +└── http.response ← MANUAL (optional) +``` + +### Tool vs Non-Tool Service Calls + +| Scenario | Manual Span Needed? | Why | +|----------|---------------------|-----| +| Service called **as agent tool** | ❌ No | Framework creates `execute_tool` span automatically | +| Service called **outside agent** | ✅ Yes | Framework doesn't see calls outside `agent.run()` | +| Cosmos DB (thread storage) | ✅ Yes | Always called outside agent context | -- [x] `MCPStreamableHTTPTool` for Microsoft Learn MCP server -- [x] `microsoft_docs_search` tool via MCP -- [x] `microsoft_code_sample_search` tool via MCP -- [x] Per-request MCP connection (serverless-friendly) +### Automatic Metrics -### Phase 4: Production Hardening (Future) +| Metric Name | Description | +|---|---| +| `gen_ai.client.operation.duration` | Duration of LLM operations | +| `gen_ai.client.token.usage` | Token usage (input/output) | +| `agent_framework.function.invocation.duration` | Tool function execution duration | + +### Viewing Traces + +| Environment | Backend | +|-------------|---------| +| Local Development | Jaeger, Aspire Dashboard, or AI Toolkit Extension | +| Azure Production | Application Insights → Transaction Search or Application Map | + +--- -- [ ] Managed Identity authentication (currently uses DefaultAzureCredential) -- [ ] Retry policies and circuit breakers -- [ ] Rate limiting -- [ ] Input sanitization +## Security Considerations -### Phase 5: Caching (Future) +| Concern | Mitigation | +|---------|------------| +| **Authentication** | `DefaultAzureCredential` (supports Managed Identity, CLI, etc.) | +| **Thread Isolation** | Cosmos DB partition key on `thread_id` / `session_id` | +| **Secrets Management** | Environment variables (Key Vault recommended for production) | +| **Input Validation** | Request body validation in route handlers | -- [ ] Redis session cache for high-frequency access -- [ ] Recent messages caching +--- ## Project Structure ```text -python/samples/demos/enterprise-chat-agent/ +enterprise-chat-agent/ ├── function_app.py # Azure Functions entry point ├── requirements.txt # Dependencies ├── host.json # Functions host configuration ├── azure.yaml # azd deployment configuration ├── demo.http # API test file +├── demo-ui.html # Browser-based demo UI ├── services/ │ ├── agent_service.py # ChatAgent + CosmosHistoryProvider │ ├── cosmos_store.py # Thread metadata storage @@ -220,30 +321,76 @@ python/samples/demos/enterprise-chat-agent/ ├── tools/ │ ├── weather.py # Weather tool │ ├── calculator.py # Calculator tool -│ └── knowledge_base.py # KB search tool -├── docs/ -│ ├── DESIGN.md # This document -│ └── AGENT_IMPLEMENTATION.md +│ ├── knowledge_base.py # KB search tool +│ └── microsoft_docs.py # Microsoft Docs MCP integration +├── docs/ # Additional documentation └── infra/ - └── main.bicep # Azure infrastructure + └── main.bicep # Azure infrastructure (Bicep) ``` -## Security Considerations +--- -| Concern | Mitigation | -|---------|------------| -| **Authentication** | `DefaultAzureCredential` (supports Managed Identity, CLI, etc.) | -| **Thread Isolation** | Cosmos DB partition key on `thread_id` / `session_id` | -| **Secrets Management** | Environment variables (Key Vault recommended for production) | -| **Input Validation** | Request body validation in route handlers | +## Implementation Status + +### ✅ Phase 1: Core Chat API + +- Azure Functions HTTP triggers +- ChatAgent with Azure OpenAI +- Local tools (weather, calculator, knowledge base) +- `CosmosHistoryProvider` for automatic message persistence +- `CosmosConversationStore` for thread metadata +- README with setup instructions +- Infrastructure as Code (Bicep + azd) + +### ✅ Phase 2: Observability + +- OpenTelemetry integration via Agent Framework +- Custom spans for HTTP requests and Cosmos operations +- Structured logging +- Health check endpoint + +### ✅ Phase 3: MCP Integration + +- `MCPStreamableHTTPTool` for Microsoft Learn MCP server +- `microsoft_docs_search` tool via MCP +- `microsoft_code_sample_search` tool via MCP +- Per-request MCP connection (serverless-friendly) + +### 🔄 Phase 4: Production Hardening (Future) -## Testing +- Managed Identity authentication +- Retry policies and circuit breakers +- Rate limiting +- Input sanitization -- **Local Testing**: Use `demo.http` with VS Code REST Client or `func start` -- **Deployment**: `azd up` for full Azure deployment -- **Unit Tests**: Located in `tests/` directory +### 🔄 Phase 5: Caching (Future) + +- Redis session cache for high-frequency access +- Recent messages caching + +--- + +## Configuration + +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint URL | +| `AZURE_OPENAI_MODEL` | Model deployment name (e.g., `gpt-4o`) | +| `AZURE_OPENAI_API_VERSION` | API version (e.g., `2024-10-21`) | +| `AZURE_COSMOS_ENDPOINT` | Cosmos DB endpoint | +| `AZURE_COSMOS_DATABASE_NAME` | Database name (e.g., `chat_db`) | +| `AZURE_COSMOS_CONTAINER_NAME` | Messages container name | +| `AZURE_COSMOS_THREADS_CONTAINER_NAME` | Threads container name | +| `ENABLE_OTEL` | Enable OpenTelemetry (`true`/`false`) | +| `OTLP_ENDPOINT` | OTLP collector endpoint | +| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Azure Monitor connection | +| `OTEL_SERVICE_NAME` | Service name for traces | + +--- ## Open Questions -1. **Streaming support**: Should a future phase include SSE streaming responses? -2. **Multi-tenant**: Should thread isolation support user-level partitioning? +1. **Multi-tenant**: Should thread isolation support user-level partitioning? +2. **Caching Strategy**: What's the optimal TTL for conversation context caching? diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md deleted file mode 100644 index f4584519e1..0000000000 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/MCP_INTEGRATION.md +++ /dev/null @@ -1,164 +0,0 @@ -# Microsoft Docs MCP Server Integration - -## Overview - -This document explains how to integrate the Microsoft Docs MCP server into the Enterprise Chat Agent, enabling real-time access to official Microsoft and Azure documentation. - -## What is MCP? - -**Model Context Protocol (MCP)** is a standard for connecting AI applications to external data sources and tools. The Microsoft Docs MCP server provides access to: -- Official Microsoft Learn documentation -- Azure service documentation -- Code samples and examples -- API references - -## Current Status - -The chat agent includes two MCP-ready tools: -- `search_microsoft_docs` - Search documentation content -- `search_microsoft_code_samples` - Find code examples - -**Status:** Tools are defined but MCP integration requires VS Code/Copilot Chat environment or custom MCP client implementation. - -## Integration Options - -### Option 1: Use in VS Code with GitHub Copilot (Recommended) - -The MCP server is already available in your VS Code environment. The tools can be used directly when the agent runs in a Copilot-enabled context. - -**No additional code needed** - the MCP functions are available via the Copilot extension. - -### Option 2: Direct HTTP API Integration (Azure Functions) - -For standalone Azure Functions deployment, replace MCP calls with direct REST API calls to Microsoft Learn search: - -```python -import httpx - -async def search_microsoft_docs(query: str, max_results: int = 5) -> list[dict]: - """Search Microsoft docs via REST API.""" - # Microsoft Learn has a public search endpoint - async with httpx.AsyncClient() as client: - response = await client.get( - "https://learn.microsoft.com/api/search", - params={ - "search": query, - "locale": "en-us", - "$top": max_results, - } - ) - results = response.json() - - return [ - { - "title": result["title"], - "content": result["description"], - "url": result["url"], - } - for result in results.get("results", []) - ] -``` - -### Option 3: Use Azure Cognitive Search on Microsoft Learn Index - -For production deployments, use Azure Cognitive Search with a pre-built index of Microsoft documentation: - -```python -from azure.search.documents import SearchClient -from azure.identity import DefaultAzureCredential - -async def search_microsoft_docs(query: str, max_results: int = 5) -> list[dict]: - """Search using Azure Cognitive Search.""" - credential = DefaultAzureCredential() - search_client = SearchClient( - endpoint=os.environ["AZURE_SEARCH_ENDPOINT"], - index_name="microsoft-docs-index", - credential=credential, - ) - - results = search_client.search( - search_text=query, - top=max_results, - select=["title", "content", "url"], - ) - - return [ - { - "title": doc["title"], - "content": doc["content"], - "url": doc["url"], - } - for doc in results - ] -``` - -## Example Usage - -Once integrated, users can ask: - -``` -User: "How do I configure partition keys in Azure Cosmos DB?" -→ Agent calls: search_microsoft_docs("Cosmos DB partition keys") -→ Returns: Official docs with best practices, examples, and guidance -``` - -``` -User: "Show me Python code for Azure OpenAI chat completion" -→ Agent calls: search_microsoft_code_samples("Azure OpenAI chat completion", language="python") -→ Returns: Official code examples from Microsoft Learn -``` - -## Implementation Steps - -### Quick Test (Local with VS Code) - -1. The MCP server is already available in your VS Code environment -2. Tools are defined and ready -3. Test with Copilot Chat to verify MCP integration - -### Production Deployment (Azure Functions) - -1. Choose integration method (Option 2 or 3 above) -2. Update `tools/microsoft_docs.py` with real implementation -3. Add required dependencies to `requirements.txt`: - ``` - httpx>=0.24.0 # For REST API option - # OR - azure-search-documents>=11.4.0 # For Azure Search option - ``` -4. Add environment variables: - ```json - { - "AZURE_SEARCH_ENDPOINT": "https://your-search.search.windows.net", - "MICROSOFT_LEARN_API_KEY": "optional-if-using-api" - } - ``` -5. Deploy with `azd up` - -## Benefits - -✅ **Authoritative Information**: Official Microsoft documentation -✅ **Always Current**: Latest product updates and features -✅ **Code Examples**: Real, tested code samples -✅ **Better Support**: Answer Azure questions with confidence -✅ **Reduced Hallucination**: Grounded in actual documentation - -## Example Queries the Agent Can Now Handle - -- "What are Azure Functions hosting options?" -- "How do I implement retry policies in Azure?" -- "Show me code for Azure Cosmos DB bulk operations" -- "What's the difference between Azure App Service and Container Apps?" -- "How do I configure CORS for Azure Functions?" -- "Best practices for Azure OpenAI rate limiting" - -## Next Steps - -1. **Test locally**: Run agent and ask Azure-related questions -2. **Choose production integration**: REST API or Azure Search -3. **Implement real search**: Replace placeholder with actual calls -4. **Deploy and monitor**: Track which docs are most helpful - -For questions about MCP, see: -- [Model Context Protocol Specification](https://modelcontextprotocol.io) -- [Microsoft MCP Servers](https://github.com/microsoft/mcp-servers) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md deleted file mode 100644 index b10f934062..0000000000 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/observability-design.md +++ /dev/null @@ -1,243 +0,0 @@ -# Observability Design — Enterprise Chat Agent - -This document describes the OpenTelemetry observability design for the Enterprise Chat Agent, aligned with the Microsoft Agent Framework's built-in instrumentation. - -## Design Principles - -1. **Don't duplicate framework instrumentation** — Use the Agent Framework's automatic spans for agent/LLM/tool tracing -2. **Fill the gaps** — Add manual spans only for layers the framework cannot see (HTTP, Cosmos DB, validation) -3. **Use framework APIs** — Leverage `setup_observability()`, `get_tracer()`, and `get_meter()` from `agent_framework` - ---- - -## Framework's Built-in Instrumentation (Automatic) - -The Microsoft Agent Framework automatically creates these spans when you call agent/chat client methods: - -| Span Name Pattern | When Created | Key Attributes | -|---|---|---| -| `invoke_agent {agent_name}` | `agent.run()` / `agent.run_stream()` | `gen_ai.agent.id`, `gen_ai.agent.name`, `gen_ai.conversation.id` | -| `chat {model_id}` | `chat_client.get_response()` / `get_streaming_response()` | `gen_ai.request.model`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens` | -| `execute_tool {function_name}` | Tool function invocations via `AIFunction` | `gen_ai.tool.name`, `gen_ai.tool.call.id`, `gen_ai.tool.type` | - -### Framework-Provided Functions - -```python -from agent_framework import setup_observability, get_tracer, get_meter -``` - -- **`setup_observability()`** — Configures TracerProvider, MeterProvider, LoggerProvider with OTLP/Azure Monitor exporters -- **`get_tracer()`** — Returns the configured tracer for custom spans -- **`get_meter()`** — Returns the configured meter for custom metrics - -### Automatic Metrics - -| Metric Name | Description | -|---|---| -| `gen_ai.client.operation.duration` | Duration of LLM operations | -| `gen_ai.client.token.usage` | Token usage (input/output) | -| `agent_framework.function.invocation.duration` | Tool function execution duration | - ---- - -## Tool vs Non-Tool Service Calls - -Whether you need manual spans depends on **how** a service is invoked: - -| Scenario | Manual Span Needed? | Why | -|----------|---------------------|-----| -| AI Search **as agent tool** | ❌ No | Framework creates `execute_tool` span automatically | -| Redis **as agent tool** | ❌ No | Framework creates `execute_tool` span automatically | -| AI Search **outside agent** (pre/post processing) | ✅ Yes | Framework doesn't see calls outside `agent.run()` | -| Redis **outside agent** (caching layer) | ✅ Yes | Framework doesn't see calls outside `agent.run()` | -| Cosmos DB (thread storage) | ✅ Yes | Always called outside agent context | - -### Example: Tool vs Direct Call - -```python -# AS A TOOL - Framework handles instrumentation automatically -@ai_function -async def search_knowledge_base(query: str) -> str: - return await ai_search_client.search(query) # No manual span needed - -response = await agent.run(message, tools=[search_knowledge_base]) -# Framework creates: invoke_agent → execute_tool search_knowledge_base - -# OUTSIDE AGENT - Manual span required -async with redis_span("get", "session_cache"): - cached_context = await redis.get(f"context:{thread_id}") # Before agent call - -async with ai_search_span("index", "conversation_logs"): - await ai_search.index_document(log) # After agent call for analytics -``` - ---- - -## Enterprise Chat Agent Custom Spans (Manual) - -The framework doesn't know about HTTP requests, Cosmos DB operations, or services called outside the agent. We add spans for these layers: - -| Layer | Span Name Pattern | Purpose | -|---|---|---| -| HTTP Request | `http.request {method} {path}` | Track request lifecycle | -| Cosmos DB | `cosmos.{operation} {container}` | Track database operations | -| Redis | `redis.{operation} {key_pattern}` | Track caching operations | -| AI Search | `ai_search.{operation} {index}` | Track search operations | -| Validation | `request.validate {operation}` | Track authorization checks | - ---- - -## Span Hierarchy - -```text -http.request POST /threads/{thread_id}/messages ← MANUAL (HTTP layer) -├── cosmos.read threads ← MANUAL (Cosmos layer) -├── request.validate verify_thread_ownership ← MANUAL (Validation) -├── invoke_agent ChatAgent ← FRAMEWORK (automatic) -│ ├── chat gpt-4o ← FRAMEWORK (automatic) -│ │ └── (internal LLM call spans) -│ └── execute_tool get_weather ← FRAMEWORK (automatic) -├── cosmos.upsert threads ← MANUAL (Cosmos layer) -└── http.response ← MANUAL (optional) -``` - ---- - -## Implementation - -The observability module (`observability.py`) provides async context managers: - -- **`init_observability()`** — Wraps `setup_observability()`, call once at startup -- **`http_request_span(method, path, thread_id, user_id)`** — Top-level HTTP span -- **`cosmos_span(operation, container, partition_key)`** — Cosmos DB operation span -- **`redis_span(operation, key_pattern)`** — Redis caching span -- **`ai_search_span(operation, index)`** — AI Search span -- **`validation_span(operation)`** — Request validation span - -### Usage Pattern - -```python -from observability import init_observability, http_request_span, cosmos_span - -init_observability() # Once at startup - -@app.route(route="threads/{thread_id}/messages", methods=["POST"]) -async def send_message(req: func.HttpRequest) -> func.HttpResponse: - async with http_request_span("POST", "/threads/{thread_id}/messages", thread_id, user_id): - async with cosmos_span("read", "threads", thread_id): - thread = await cosmos_store.get_thread(thread_id) - - # Agent invocation - NO manual span needed (framework handles it) - response = await agent.run(message, thread=agent_thread) - - async with cosmos_span("upsert", "threads", thread_id): - await cosmos_store.save_thread_state(thread_id, thread) -``` - ---- - -## Dependencies - -Add to `requirements.txt`: - -```txt -# OpenTelemetry Core -opentelemetry-api>=1.25.0 -opentelemetry-sdk>=1.25.0 - -# Exporters -opentelemetry-exporter-otlp>=1.25.0 -azure-monitor-opentelemetry-exporter>=1.0.0b41 - -# Semantic Conventions -opentelemetry-semantic-conventions-ai>=0.5.0 -``` - ---- - -## Environment Variables - -Configure these in `local.settings.json` or Azure Function App settings: - -| Variable | Description | Example | -|---|---|---| -| `ENABLE_OTEL` | Enable OpenTelemetry | `true` | -| `ENABLE_SENSITIVE_DATA` | Log message contents (dev only!) | `false` | -| `OTLP_ENDPOINT` | OTLP collector endpoint | `http://localhost:4317` | -| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Azure Monitor connection | `InstrumentationKey=...` | -| `OTEL_SERVICE_NAME` | Service name for traces | `enterprise-chat-agent` | - -### Example `local.settings.json` - -```json -{ - "IsEncrypted": false, - "Values": { - "AzureWebJobsStorage": "UseDevelopmentStorage=true", - "FUNCTIONS_WORKER_RUNTIME": "python", - "COSMOS_ENDPOINT": "https://your-cosmos.documents.azure.com:443/", - "COSMOS_DATABASE": "chat-database", - "AZURE_OPENAI_ENDPOINT": "https://your-openai.openai.azure.com/", - "AZURE_OPENAI_DEPLOYMENT": "gpt-4o", - "ENABLE_OTEL": "true", - "ENABLE_SENSITIVE_DATA": "false", - "OTLP_ENDPOINT": "http://localhost:4317", - "OTEL_SERVICE_NAME": "enterprise-chat-agent" - } -} -``` - -### Azure Functions `host.json` Configuration - -Enable OpenTelemetry mode for Azure Functions: - -```json -{ - "version": "2.0", - "logging": { - "applicationInsights": { - "samplingSettings": { - "isEnabled": true, - "excludedTypes": "Request" - } - } - }, - "telemetryMode": "OpenTelemetry" -} -``` - ---- - -## Viewing Traces - -### Local Development - -Use one of these OTLP-compatible backends: - -1. **Jaeger**: `docker run -p 16686:16686 -p 4317:4317 jaegertracing/all-in-one` -2. **Aspire Dashboard**: Part of .NET Aspire, provides a nice UI -3. **AI Toolkit Extension**: Set `VS_CODE_EXTENSION_PORT` for VS Code integration - -### Azure Production - -Traces are sent to Azure Monitor Application Insights. View in: - -1. **Azure Portal** → Application Insights → Transaction Search -2. **Azure Portal** → Application Insights → Application Map (for distributed tracing) - ---- - -## Summary - -| Layer | Span | Instrumented By | -|-------|------|-----------------| -| HTTP Request | `http.request {method} {path}` | Enterprise Agent (manual) | -| Cosmos DB | `cosmos.{operation} {container}` | Enterprise Agent (manual) | -| Validation | `request.validate {operation}` | Enterprise Agent (manual) | -| Redis (outside agent) | `redis.{operation} {key}` | Enterprise Agent (manual) | -| AI Search (outside agent) | `ai_search.{operation} {index}` | Enterprise Agent (manual) | -| Agent Invocation | `invoke_agent {name}` | Agent Framework (automatic) | -| LLM Calls | `chat {model}` | Agent Framework (automatic) | -| Tool Execution | `execute_tool {function}` | Agent Framework (automatic) | - -**Key Insight**: If a service (Redis, AI Search, etc.) is invoked **as a tool** through `agent.run()`, the framework instruments it automatically. Only add manual spans for services called **outside** the agent context. diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/host.json b/python/samples/05-end-to-end/enterprise-chat-agent/host.json index 06d01bdaa9..cd6a04cc05 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/host.json +++ b/python/samples/05-end-to-end/enterprise-chat-agent/host.json @@ -6,8 +6,14 @@ "isEnabled": true, "excludedTypes": "Request" } + }, + "logLevel": { + "default": "Information", + "azure.cosmos": "Warning", + "azure.core.pipeline.policies.http_logging_policy": "Warning" } }, + "telemetryMode": "OpenTelemetry", "extensionBundle": { "id": "Microsoft.Azure.Functions.ExtensionBundle", "version": "[4.*, 5.0.0)" diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example index 8056b3897f..31f80ae04c 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example +++ b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example @@ -13,5 +13,9 @@ "ENABLE_SENSITIVE_DATA": "false", "OTLP_ENDPOINT": "http://localhost:4317", "OTEL_SERVICE_NAME": "enterprise-chat-agent" + }, + "Host": { + "CORS": "*", + "CORSCredentials": false } } diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py index 3d00b89547..2fb592d567 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone import azure.functions as func +from agent_framework import AgentSession from services import ( http_request_span, @@ -86,6 +87,10 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: # Get agent (configured with CosmosHistoryProvider and local tools) agent = get_agent() + # Create session with thread_id so CosmosHistoryProvider uses it + session = AgentSession(session_id=thread_id) + logging.info(f"Running agent with session_id={session.session_id}") + # Run agent with MCP tools for Microsoft Learn documentation # The agent combines: # - Local tools: get_weather, calculate, search_knowledge_base @@ -93,7 +98,7 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: async with get_mcp_tool() as mcp: response = await agent.run( content, - session_id=thread_id, + session=session, # Pass session object, not session_id tools=mcp, # Add MCP tools for this run ) @@ -174,14 +179,22 @@ async def get_messages(req: func.HttpRequest) -> func.HttpResponse: async with cosmos_span("query", "messages", thread_id): messages = await history_provider.get_messages(session_id=thread_id) + logging.info(f"Retrieved {len(messages)} messages for thread {thread_id}") + # Convert Message objects to serializable dicts + # Message has .role (str) and .text (property that concatenates all TextContent) message_list = [] for msg in messages: + role = msg.role.value if hasattr(msg.role, "value") else str(msg.role) + # Use the .text property which concatenates all text contents + content = msg.text if hasattr(msg, "text") else "" + logging.info(f"Message: role={role}, content={content[:100] if content else 'empty'}...") message_list.append({ - "role": msg.role.value if hasattr(msg.role, "value") else str(msg.role), - "content": msg.content if hasattr(msg, "content") else str(msg), + "role": role, + "content": content, }) + logging.info(f"Returning {len(message_list)} serialized messages") span.set_attribute("http.status_code", 200) return func.HttpResponse( body=json.dumps({"messages": message_list}), diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py index a09b998131..011ca4a593 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py @@ -70,6 +70,74 @@ async def create_thread(req: func.HttpRequest) -> func.HttpResponse: ) +@bp.route(route="threads", methods=["GET"]) +async def list_threads(req: func.HttpRequest) -> func.HttpResponse: + """ + List all conversation threads. + + Query Parameters: + user_id: Filter by user ID (optional) + status: Filter by status - 'active', 'archived', 'deleted' (optional) + limit: Maximum number of threads to return (default 50, max 100) + offset: Number of threads to skip for pagination (default 0) + + Request: + GET /api/threads + GET /api/threads?user_id=user_1234 + GET /api/threads?status=active&limit=20 + + Response: + 200 OK + { + "threads": [...], + "count": 10, + "limit": 50, + "offset": 0 + } + """ + user_id = req.params.get("user_id") + status = req.params.get("status") + + try: + limit = min(int(req.params.get("limit", 50)), 100) + except ValueError: + limit = 50 + + try: + offset = max(int(req.params.get("offset", 0)), 0) + except ValueError: + offset = 0 + + async with http_request_span( + "GET", "/threads", user_id=user_id + ) as span: + store = get_store() + async with cosmos_span("query", "threads", "list"): + threads = await store.list_threads( + user_id=user_id, + status=status, + limit=limit, + offset=offset, + ) + + result = { + "threads": threads, + "count": len(threads), + "limit": limit, + "offset": offset, + } + + logging.info( + f"Listed {len(threads)} threads (user_id={user_id}, status={status})" + ) + + span.set_attribute("http.status_code", 200) + return func.HttpResponse( + body=json.dumps(result), + mimetype="application/json", + ) + + @bp.route(route="threads/{thread_id}", methods=["GET"]) async def get_thread(req: func.HttpRequest) -> func.HttpResponse: """ @@ -148,3 +216,34 @@ async def delete_thread(req: func.HttpRequest) -> func.HttpResponse: span.set_attribute("http.status_code", 204) return func.HttpResponse(status_code=204) + + +@bp.route(route="debug/sessions", methods=["GET"]) +async def debug_list_sessions(req: func.HttpRequest) -> func.HttpResponse: + """ + Debug endpoint to list all session_ids that have messages in CosmosHistoryProvider. + This helps diagnose mismatches between thread_ids and session_ids. + + GET /api/debug/sessions + """ + history_provider = get_history_provider() + + try: + sessions = await history_provider.list_sessions() + + return func.HttpResponse( + body=json.dumps({ + "sessions": sessions, + "count": len(sessions), + "source_id": history_provider.source_id, + "note": "These are session_ids from the messages container. They should match thread_ids for messages to load correctly." + }), + mimetype="application/json", + ) + except Exception as e: + logging.error(f"Failed to list sessions: {e}") + return func.HttpResponse( + body=json.dumps({"error": str(e)}), + status_code=500, + mimetype="application/json", + ) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py index 227435926f..8ff5d7dffa 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py @@ -216,3 +216,58 @@ async def thread_exists(self, thread_id: str) -> bool: """ thread = await self.get_thread(thread_id) return thread is not None + + async def list_threads( + self, + user_id: str | None = None, + status: str | None = None, + limit: int = 50, + offset: int = 0, + ) -> list[dict]: + """ + List all threads with optional filters. + + Args: + user_id: Filter by user ID (optional). + status: Filter by status - 'active', 'archived', 'deleted' (optional). + limit: Maximum number of threads to return (default 50). + offset: Number of threads to skip for pagination. + + Returns: + List of thread documents sorted by updated_at descending. + """ + # Build query with optional filters + conditions = ["c.type = 'thread'"] + parameters = [] + + if user_id: + conditions.append("c.user_id = @user_id") + parameters.append({"name": "@user_id", "value": user_id}) + + if status: + conditions.append("c.status = @status") + parameters.append({"name": "@status", "value": status}) + + query = f""" + SELECT * FROM c + WHERE {' AND '.join(conditions)} + ORDER BY c.updated_at DESC + OFFSET @offset LIMIT @limit + """ + parameters.extend([ + {"name": "@offset", "value": offset}, + {"name": "@limit", "value": limit}, + ]) + + items = list( + self.container.query_items( + query=query, + parameters=parameters, + enable_cross_partition_query=True, + ) + ) + + logging.info( + f"Listed {len(items)} threads (user_id={user_id}, status={status})" + ) + return items diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py index a5ce7f68a8..078cf079d3 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py @@ -56,6 +56,11 @@ def init_observability() -> None: - OTEL_SERVICE_NAME: Service name (default: agent_framework) """ try: + # Reduce verbose logging from httpx/httpcore (HTTP transport details) + # These generate 100+ traces per request with DEBUG logging + logging.getLogger("httpx").setLevel(logging.WARNING) + logging.getLogger("httpcore").setLevel(logging.WARNING) + configure_otel_providers() logger.info("Observability initialized successfully") except Exception as e: From 546a105bfc5b5c782ea68fd74e1da37d919b3f1e Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Wed, 25 Mar 2026 15:49:17 +0000 Subject: [PATCH 05/10] feat(python): Add enterprise chat agent sample with Azure Functions and Cosmos DB formatting fix --- .../enterprise-chat-agent/function_app.py | 3 +- .../enterprise-chat-agent/routes/__init__.py | 4 +- .../enterprise-chat-agent/routes/health.py | 14 ++++--- .../enterprise-chat-agent/routes/messages.py | 38 ++++++++++------- .../enterprise-chat-agent/routes/threads.py | 25 +++++------ .../services/__init__.py | 18 ++++---- .../services/agent_service.py | 21 ++++------ .../services/cosmos_store.py | 13 +++--- .../services/observability.py | 11 ++--- .../enterprise-chat-agent/tools/__init__.py | 2 +- .../enterprise-chat-agent/tools/calculator.py | 2 +- .../tools/knowledge_base.py | 41 +++++++++++++------ 12 files changed, 106 insertions(+), 86 deletions(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/function_app.py b/python/samples/05-end-to-end/enterprise-chat-agent/function_app.py index 026182835d..b6e7708cdb 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/function_app.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/function_app.py @@ -15,9 +15,8 @@ """ import azure.functions as func - +from routes import health_bp, messages_bp, threads_bp from services import init_observability -from routes import threads_bp, messages_bp, health_bp # Initialize observability once at startup init_observability() diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py index 1503963b14..514c94ea07 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py @@ -9,8 +9,8 @@ - health: Health check endpoint """ -from routes.threads import bp as threads_bp -from routes.messages import bp as messages_bp from routes.health import bp as health_bp +from routes.messages import bp as messages_bp +from routes.threads import bp as threads_bp __all__ = ["threads_bp", "messages_bp", "health_bp"] diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py index 623b3ab9f2..a11976d76c 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/health.py @@ -28,16 +28,18 @@ async def health_check(req: func.HttpRequest) -> func.HttpResponse: try: store = get_store() # Simple connectivity check - initializes connection if needed - store.container + _ = store.container cosmos_connected = True except Exception as e: logging.warning(f"Cosmos DB connectivity check failed: {e}") return func.HttpResponse( - body=json.dumps({ - "status": "healthy", - "version": "1.0.0", - "cosmos_connected": cosmos_connected, - }), + body=json.dumps( + { + "status": "healthy", + "version": "1.0.0", + "cosmos_connected": cosmos_connected, + } + ), mimetype="application/json", ) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py index 2fb592d567..c5efe7b790 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py @@ -8,14 +8,14 @@ import azure.functions as func from agent_framework import AgentSession - from services import ( - http_request_span, cosmos_span, get_agent, get_history_provider, get_mcp_tool, + http_request_span, ) + from routes.threads import get_store bp = func.Blueprint() @@ -70,9 +70,7 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: if not content: span.set_attribute("http.status_code", 400) return func.HttpResponse( - body=json.dumps( - {"error": "Missing 'content' in request body"} - ), + body=json.dumps({"error": "Missing 'content' in request body"}), status_code=400, mimetype="application/json", ) @@ -109,14 +107,20 @@ async def send_message(req: func.HttpRequest) -> func.HttpResponse: # Parse tool calls from response if any if hasattr(response, "tool_calls") and response.tool_calls: for tool_call in response.tool_calls: - tool_calls.append({ - "tool": getattr(tool_call, "name", str(tool_call)), - "arguments": getattr(tool_call, "arguments", {}), - }) + tool_calls.append( + { + "tool": getattr(tool_call, "name", str(tool_call)), + "arguments": getattr(tool_call, "arguments", {}), + } + ) # Update thread metadata with last message preview async with cosmos_span("update", "threads", thread_id): - preview = response_content[:100] + "..." if len(response_content) > 100 else response_content + preview = ( + response_content[:100] + "..." + if len(response_content) > 100 + else response_content + ) await store.update_thread( thread_id=thread_id, last_message_preview=preview, @@ -188,11 +192,15 @@ async def get_messages(req: func.HttpRequest) -> func.HttpResponse: role = msg.role.value if hasattr(msg.role, "value") else str(msg.role) # Use the .text property which concatenates all text contents content = msg.text if hasattr(msg, "text") else "" - logging.info(f"Message: role={role}, content={content[:100] if content else 'empty'}...") - message_list.append({ - "role": role, - "content": content, - }) + logging.info( + f"Message: role={role}, content={content[:100] if content else 'empty'}..." + ) + message_list.append( + { + "role": role, + "content": content, + } + ) logging.info(f"Returning {len(message_list)} serialized messages") span.set_attribute("http.status_code", 200) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py index 011ca4a593..c764eb96b3 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py @@ -7,12 +7,11 @@ import uuid import azure.functions as func - from services import ( CosmosConversationStore, - http_request_span, cosmos_span, get_history_provider, + http_request_span, ) bp = func.Blueprint() @@ -56,9 +55,7 @@ async def create_thread(req: func.HttpRequest) -> func.HttpResponse: async with http_request_span("POST", "/threads", user_id=user_id) as span: store = get_store() async with cosmos_span("create", "threads", thread_id): - thread = await store.create_thread( - thread_id, user_id, title, metadata - ) + thread = await store.create_thread(thread_id, user_id, title, metadata) logging.info(f"Created thread {thread_id}") @@ -108,9 +105,7 @@ async def list_threads(req: func.HttpRequest) -> func.HttpResponse: except ValueError: offset = 0 - async with http_request_span( - "GET", "/threads", user_id=user_id - ) as span: + async with http_request_span("GET", "/threads", user_id=user_id) as span: store = get_store() async with cosmos_span("query", "threads", "list"): threads = await store.list_threads( @@ -232,12 +227,14 @@ async def debug_list_sessions(req: func.HttpRequest) -> func.HttpResponse: sessions = await history_provider.list_sessions() return func.HttpResponse( - body=json.dumps({ - "sessions": sessions, - "count": len(sessions), - "source_id": history_provider.source_id, - "note": "These are session_ids from the messages container. They should match thread_ids for messages to load correctly." - }), + body=json.dumps( + { + "sessions": sessions, + "count": len(sessions), + "source_id": history_provider.source_id, + "note": "Session IDs from messages container. Should match thread_ids.", + } + ), mimetype="application/json", ) except Exception as e: diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py index 9a76a904c9..039c38c1a1 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/__init__.py @@ -9,19 +9,19 @@ - agent_service: ChatAgent with CosmosHistoryProvider and MCP integration """ -from services.cosmos_store import CosmosConversationStore -from services.observability import ( - init_observability, - http_request_span, - cosmos_span, - validation_span, - EnterpriseAgentAttr, -) from services.agent_service import ( + close_providers, get_agent, get_history_provider, get_mcp_tool, - close_providers, +) +from services.cosmos_store import CosmosConversationStore +from services.observability import ( + EnterpriseAgentAttr, + cosmos_span, + http_request_span, + init_observability, + validation_span, ) __all__ = [ diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py index 859d6022a7..0dc982d51d 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py @@ -14,18 +14,16 @@ from pathlib import Path from typing import Optional -from azure.identity import DefaultAzureCredential from agent_framework import Agent, MCPStreamableHTTPTool from agent_framework.azure import AzureOpenAIChatClient from agent_framework_azure_cosmos import CosmosHistoryProvider - +from azure.identity import DefaultAzureCredential from tools import ( - get_weather, calculate, + get_weather, search_knowledge_base, ) - _history_provider: Optional[CosmosHistoryProvider] = None _agent: Optional[Agent] = None _credential: Optional[DefaultAzureCredential] = None @@ -39,6 +37,7 @@ def _load_prompt(name: str) -> str: prompt_path = _PROMPTS_DIR / f"{name}.txt" return prompt_path.read_text(encoding="utf-8") + # Microsoft Learn MCP server URL MICROSOFT_LEARN_MCP_URL = "https://learn.microsoft.com/api/mcp" @@ -63,9 +62,7 @@ def get_history_provider() -> CosmosHistoryProvider: container_name = os.environ.get("AZURE_COSMOS_CONTAINER_NAME", "messages") if not endpoint: - raise ValueError( - "AZURE_COSMOS_ENDPOINT environment variable is required" - ) + raise ValueError("AZURE_COSMOS_ENDPOINT environment variable is required") if _credential is None: _credential = DefaultAzureCredential() @@ -76,9 +73,9 @@ def get_history_provider() -> CosmosHistoryProvider: database_name=database_name, container_name=container_name, credential=_credential, - load_messages=True, # Load history before each run - store_inputs=True, # Store user messages - store_outputs=True, # Store assistant responses + load_messages=True, # Load history before each run + store_inputs=True, # Store user messages + store_outputs=True, # Store assistant responses ) logging.info( @@ -111,9 +108,7 @@ def get_agent() -> Agent: api_version = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-21") if not endpoint: - raise ValueError( - "AZURE_OPENAI_ENDPOINT environment variable is required" - ) + raise ValueError("AZURE_OPENAI_ENDPOINT environment variable is required") # Create Azure OpenAI chat client with credential global _credential diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py index 8ff5d7dffa..47f89b625f 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py @@ -135,11 +135,10 @@ async def get_thread(self, thread_id: str) -> dict | None: Thread document or None if not found. """ try: - thread = self.container.read_item( + return self.container.read_item( item=thread_id, partition_key=thread_id, ) - return thread except CosmosResourceNotFoundError: return None @@ -254,10 +253,12 @@ async def list_threads( ORDER BY c.updated_at DESC OFFSET @offset LIMIT @limit """ - parameters.extend([ - {"name": "@offset", "value": offset}, - {"name": "@limit", "value": limit}, - ]) + parameters.extend( + [ + {"name": "@offset", "value": offset}, + {"name": "@limit", "value": limit}, + ] + ) items = list( self.container.query_items( diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py index 078cf079d3..07e4ccfa2d 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py @@ -15,10 +15,9 @@ from contextlib import asynccontextmanager from typing import AsyncIterator, Optional -from opentelemetry.trace import Span, SpanKind, Status, StatusCode - # Import framework's observability - use framework APIs, don't recreate them from agent_framework.observability import configure_otel_providers, get_tracer +from opentelemetry.trace import Span, SpanKind, Status, StatusCode logger = logging.getLogger(__name__) @@ -108,9 +107,11 @@ async def http_request_span( try: yield span # Check if status_code was set; determine success based on it - status_code = span.attributes.get("http.status_code") if hasattr( - span, 'attributes' - ) else None + status_code = ( + span.attributes.get("http.status_code") + if hasattr(span, "attributes") + else None + ) if status_code and status_code >= 400: span.set_status(Status(StatusCode.ERROR)) else: diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py index fb2615b8cc..b5ad75dd80 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/__init__.py @@ -16,9 +16,9 @@ MCP tools are connected at runtime via MCPStreamableHTTPTool in agent_service.py """ -from tools.weather import get_weather from tools.calculator import calculate from tools.knowledge_base import search_knowledge_base +from tools.weather import get_weather __all__ = [ "get_weather", diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py index 53d6904b07..7e3fed2c98 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py @@ -77,4 +77,4 @@ def calculate(expression: str) -> float: return float(result) except (SyntaxError, ValueError) as e: - raise ValueError(f"Invalid expression '{expression}': {e}") + raise ValueError(f"Invalid expression '{expression}': {e}") from e diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py index 07f7ad0229..f7d0d124b6 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py @@ -15,31 +15,46 @@ { "id": "kb_001", "title": "Order Status FAQ", - "content": "To check your order status, log into your account and visit the 'My Orders' section. You can also track your package using the tracking number sent to your email.", + "content": ( + "To check your order status, log into your account and visit the " + "'My Orders' section. Track your package using the tracking number sent to your email." + ), "category": "orders", }, { "id": "kb_002", "title": "Return Policy", - "content": "Items can be returned within 30 days of purchase. Items must be unused and in original packaging. Refunds are processed within 5-7 business days.", + "content": ( + "Items can be returned within 30 days of purchase. " + "Items must be unused and in original packaging. Refunds processed in 5-7 business days." + ), "category": "returns", }, { "id": "kb_003", "title": "Shipping Information", - "content": "Standard shipping takes 5-7 business days. Express shipping (2-3 days) is available for an additional fee. Free shipping on orders over $50.", + "content": ( + "Standard shipping takes 5-7 business days. " + "Express shipping (2-3 days) available for an additional fee. Free shipping on orders over $50." + ), "category": "shipping", }, { "id": "kb_004", "title": "Payment Methods", - "content": "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay. All transactions are securely processed.", + "content": ( + "We accept Visa, Mastercard, American Express, PayPal, and Apple Pay. " + "All transactions are securely processed." + ), "category": "payments", }, { "id": "kb_005", "title": "Account Management", - "content": "To update your account information, go to Settings > Profile. You can change your email, password, and notification preferences there.", + "content": ( + "To update your account information, go to Settings > Profile. " + "You can change your email, password, and notification preferences there." + ), "category": "account", }, ] @@ -76,13 +91,15 @@ def search_knowledge_base( or query_lower in entry["content"].lower() or any(word in entry["content"].lower() for word in query_lower.split()) ): - results.append({ - "id": entry["id"], - "title": entry["title"], - "content": entry["content"], - "category": entry["category"], - "relevance_score": 0.85, # Simulated score - }) + results.append( + { + "id": entry["id"], + "title": entry["title"], + "content": entry["content"], + "category": entry["category"], + "relevance_score": 0.85, # Simulated score + } + ) # Sort by relevance (simulated) and limit results results.sort(key=lambda x: x["relevance_score"], reverse=True) From 06da24d5ad627811544abbf114b5951d37a1c31f Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:31:59 +0000 Subject: [PATCH 06/10] Copilot review comments fixed --- .../enterprise-chat-agent/README.md | 71 ++++++------ .../enterprise-chat-agent/demo-ui.html | 4 +- .../enterprise-chat-agent/docs/DESIGN.md | 107 ++++++------------ .../infra/core/database/cosmos-nosql.bicep | 44 +------ .../infra/core/host/function-app.bicep | 8 +- .../enterprise-chat-agent/infra/main.bicep | 2 - .../local.settings.json.example | 9 +- .../enterprise-chat-agent/routes/__init__.py | 3 +- .../enterprise-chat-agent/routes/messages.py | 14 ++- .../enterprise-chat-agent/routes/threads.py | 22 ++++ .../services/agent_service.py | 9 +- .../services/cosmos_store.py | 91 +++++++++------ .../enterprise-chat-agent/tools/calculator.py | 38 ++++++- 13 files changed, 214 insertions(+), 208 deletions(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/README.md b/python/samples/05-end-to-end/enterprise-chat-agent/README.md index 1ca7747520..a36df0f098 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/README.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/README.md @@ -8,7 +8,7 @@ This sample showcases: - **Azure Functions HTTP Triggers** - Serverless REST API endpoints - **Runtime Tool Selection** - Agent autonomously decides which tools to invoke based on user intent -- **Cosmos DB Persistence** - Durable thread and message storage with thread_id partition key +- **Cosmos DB Persistence** - Two containers: `threads` (partition key `/thread_id`) and `messages` (`/session_id`) - **Production Patterns** - Error handling, observability, and security best practices - **One-command deployment** - `azd up` deploys all infrastructure @@ -41,7 +41,7 @@ Client → Azure Functions (HTTP Triggers) → ChatAgent → Azure OpenAI Deploy the complete infrastructure with a single command: ```bash -cd python/samples/demos/enterprise-chat-agent +cd python/samples/05-end-to-end/enterprise-chat-agent # Login to Azure azd auth login @@ -86,7 +86,7 @@ azd down ### Option 2: Run Locally ```bash -cd python/samples/demos/enterprise-chat-agent +cd python/samples/05-end-to-end/enterprise-chat-agent pip install -r requirements.txt ``` @@ -97,13 +97,18 @@ Copy `local.settings.json.example` to `local.settings.json` and update: "IsEncrypted": false, "Values": { "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "AzureWebJobsFeatureFlags": "EnableWorkerIndexing", "FUNCTIONS_WORKER_RUNTIME": "python", "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", - "AZURE_OPENAI_MODEL": "gpt-4o", + "AZURE_OPENAI_DEPLOYMENT_NAME": "gpt-4o", "AZURE_OPENAI_API_VERSION": "2024-10-21", "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", "AZURE_COSMOS_DATABASE_NAME": "chat_db", - "AZURE_COSMOS_CONTAINER_NAME": "messages" + "AZURE_COSMOS_CONTAINER_NAME": "messages", + "APPLICATIONINSIGHTS_CONNECTION_STRING": "", + "ENABLE_INSTRUMENTATION": "true", + "ENABLE_SENSITIVE_DATA": "false", + "ENABLE_DEBUG_ENDPOINTS": "false" } } ``` @@ -116,7 +121,11 @@ func start ### Test the API -Use the included `demo.http` file or: +After running `func start`, you can test the API in two ways: + +#### Option A: API-Only Testing (demo.http) + +Use the included `demo.http` file with VS Code's REST Client extension or any HTTP client: ```bash # Create a thread @@ -128,6 +137,12 @@ curl -X POST http://localhost:7071/api/threads/{thread_id}/messages \ -d '{"content": "What is the weather in Seattle and what is 15% tip on $85?"}' ``` +#### Option B: Interactive UI Testing (demo-ui.html) + +For a quick visual way to interact with the API, open `demo-ui.html` in your browser. This provides a simple chat interface to test thread creation, messaging, and agent responses. + +> ⚠️ **Development Only**: The `demo-ui.html` file is intended for local development and testing purposes only. It is not designed for production use. + ## API Endpoints | Method | Path | Description | @@ -172,11 +187,11 @@ User: "What's the weather in Paris and what's 18% tip on €75?" → Agent calls: get_weather("Paris") AND calculate("75 * 0.18") User: "How do I configure partition keys in Azure Cosmos DB?" -→ Agent calls: search_microsoft_docs("Cosmos DB partition keys") +→ Agent calls: microsoft_docs_search("Cosmos DB partition keys") via MCP → Returns: Official Microsoft documentation with best practices User: "Show me Python code for Azure OpenAI chat completion" -→ Agent calls: search_microsoft_code_samples("Azure OpenAI chat", language="python") +→ Agent calls: microsoft_code_sample_search("Azure OpenAI chat") via MCP → Returns: Official code examples from Microsoft Learn User: "What's your return policy?" @@ -188,13 +203,17 @@ User: "Tell me a joke" ### Available Tools -| Tool | Description | Example Use | -|------|-------------|-------------| -| `search_microsoft_docs` | Search official Microsoft/Azure docs | Azure services, cloud architecture | -| `search_microsoft_code_samples` | Find code examples from Microsoft Learn | SDK usage, implementation samples | -| `search_knowledge_base` | Internal company knowledge | Policies, FAQs, procedures | -| `get_weather` | Current weather data | Weather queries | -| `calculate` | Safe math evaluation | Calculations, tips, conversions | +| Tool | Description | Source | +|------|-------------|--------| +| `microsoft_docs_search` | Search official Microsoft/Azure docs | MCP (remote) | +| `microsoft_code_sample_search` | Find code examples from Microsoft Learn | MCP (remote) | +| `search_knowledge_base` | Internal company knowledge | Local | +| `get_weather` | Current weather data | Local | +| `calculate` | Safe math evaluation (exponent ≤ 100) | Local | + +> **Note:** MCP tools (`microsoft_docs_search`, `microsoft_code_sample_search`) are provided by the +> Microsoft Learn MCP server at `https://learn.microsoft.com/api/mcp` and discovered at runtime +> via `MCPStreamableHTTPTool`. No local tool file is needed. ## Streaming Responses @@ -288,10 +307,9 @@ enterprise-chat-agent/ │ ├── messages.py # Message endpoint │ └── health.py # Health check ├── tools/ -│ ├── weather.py # Weather tool -│ ├── calculator.py # Calculator tool -│ ├── knowledge_base.py # Knowledge base search tool -│ └── microsoft_docs.py # Microsoft Docs MCP integration +│ ├── weather.py # Weather tool (local) +│ ├── calculator.py # Calculator tool (local) +│ └── knowledge_base.py # Knowledge base search tool (local) └── infra/ # Infrastructure as Code (Bicep) ├── main.bicep # Main deployment template └── core/ # Modular Bicep components @@ -299,7 +317,7 @@ enterprise-chat-agent/ ## Design Documentation -See [DESIGN.md](./DESIGN.md) for: +See [DESIGN.md](./docs/DESIGN.md) for: - Architecture diagrams and message processing flow - Cosmos DB data model and partition strategy @@ -313,16 +331,3 @@ See [DESIGN.md](./DESIGN.md) for: - [Microsoft Agent Framework Documentation](https://learn.microsoft.com/agent-framework/) - [Azure Functions Python Developer Guide](https://learn.microsoft.com/azure/azure-functions/functions-reference-python) -## Implementation Status - -### ✅ Completed -- ✅ Create tools (weather, calculator, knowledge_base) -- ✅ Create an agent (ChatAgent with Azure OpenAI) -- ✅ Use tools with agents (@ai_function decorators + agent configuration) -- ✅ Cosmos DB persistence -- ✅ OpenTelemetry observability - -### 🔄 Pending -- ⏳ Test agent locally with `func start` -- ⏳ Check the logs in Application Insights -- ⏳ Deploy to Azure with `azd up` diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html b/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html index 8b9db8e4fe..05fbaf2d7e 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html +++ b/python/samples/05-end-to-end/enterprise-chat-agent/demo-ui.html @@ -279,8 +279,8 @@

👋 Welcome!

} list.innerHTML = threads.map(t => `
-
${t.title || 'New Chat'}
-
${t.last_message_preview || 'No messages'}
+
${escapeHtml(t.title || 'New Chat')}
+
${escapeHtml(t.last_message_preview || 'No messages')}
${new Date(t.created_at).toLocaleDateString()}
`).join(''); diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md index ab84768f26..9612cbf4df 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md @@ -50,8 +50,8 @@ flowchart TB Weather["get_weather"] Calc["calculate"] KB["search_knowledge_base"] - Docs["search_microsoft_docs
(MCP)"] - Code["search_microsoft_code_samples
(MCP)"] + Docs["microsoft_docs_search
(MCP)"] + Code["microsoft_code_sample_search
(MCP)"] end Endpoints --> Agent @@ -111,7 +111,7 @@ POST /api/threads/{thread_id}/messages 4. Agent automatically calls tools as needed: • get_weather("Seattle") • calculate("85 * 0.15") - • search_microsoft_docs("Azure Functions") + • microsoft_docs_search("Azure Functions") via MCP ↓ ┌─────────────────────────────────────────┐ │ CosmosHistoryProvider (automatic): │ @@ -144,7 +144,7 @@ The agent is configured with multiple tools but **decides at runtime** which too |------------|----------------| | "What's the weather in Tokyo?" | `get_weather("Tokyo")` | | "What's the weather in Paris and what's 18% tip on €75?" | `get_weather("Paris")` AND `calculate("75 * 0.18")` | -| "How do I configure partition keys in Azure Cosmos DB?" | `search_microsoft_docs("Cosmos DB partition keys")` | +| "How do I configure partition keys in Azure Cosmos DB?" | `microsoft_docs_search("Cosmos DB partition keys")` via MCP | | "Tell me a joke" | (No tools — direct response) | ### 2. Cosmos DB Persistence Strategy @@ -153,9 +153,12 @@ The agent is configured with multiple tools but **decides at runtime** which too | Container | Purpose | Managed By | Partition Key | |-----------|---------|------------|---------------| -| `threads` | Thread metadata (user_id, title, timestamps) | `CosmosConversationStore` (custom) | `/id` | +| `threads` | Thread metadata (user_id, title, timestamps) | `CosmosConversationStore` (custom) | `/thread_id` | | `messages` | Conversation messages | `CosmosHistoryProvider` (framework) | `/session_id` | +> **Note:** Both containers are auto-created by the Python code at runtime with the correct partition keys. +> The Bicep infrastructure only provisions the Cosmos DB account and database — not the containers. + **CosmosHistoryProvider** from `agent-framework-azure-cosmos` automatically: - Loads conversation history before each agent run - Stores user inputs and agent responses after each run @@ -183,6 +186,27 @@ Using **HTTP Triggers** for a familiar REST API pattern: The integration uses `MCPStreamableHTTPTool` with per-request connections (serverless-friendly pattern). +**Implementation** (in `services/agent_service.py`): +```python +MICROSOFT_LEARN_MCP_URL = "https://learn.microsoft.com/api/mcp" + +def get_mcp_tool() -> MCPStreamableHTTPTool: + return MCPStreamableHTTPTool( + name="Microsoft Learn", + url=MICROSOFT_LEARN_MCP_URL, + approval_mode="never_require", + ) + +# Usage in messages.py: +async with get_mcp_tool() as mcp: + response = await agent.run(content, session=session, tools=mcp) +``` + +**Key Points:** +- No local tool file needed — tools are discovered from the remote MCP server +- Tools (`microsoft_docs_search`, `microsoft_code_sample_search`) are injected at runtime +- Async context manager ensures proper connection lifecycle + **Benefits:** - ✅ Authoritative information from official sources - ✅ Always current with latest product updates @@ -319,78 +343,11 @@ enterprise-chat-agent/ │ ├── messages.py # Message endpoint │ └── health.py # Health check ├── tools/ -│ ├── weather.py # Weather tool -│ ├── calculator.py # Calculator tool -│ ├── knowledge_base.py # KB search tool -│ └── microsoft_docs.py # Microsoft Docs MCP integration +│ ├── weather.py # Weather tool (local) +│ ├── calculator.py # Calculator tool (local) +│ └── knowledge_base.py # KB search tool (local) ├── docs/ # Additional documentation └── infra/ └── main.bicep # Azure infrastructure (Bicep) ``` ---- - -## Implementation Status - -### ✅ Phase 1: Core Chat API - -- Azure Functions HTTP triggers -- ChatAgent with Azure OpenAI -- Local tools (weather, calculator, knowledge base) -- `CosmosHistoryProvider` for automatic message persistence -- `CosmosConversationStore` for thread metadata -- README with setup instructions -- Infrastructure as Code (Bicep + azd) - -### ✅ Phase 2: Observability - -- OpenTelemetry integration via Agent Framework -- Custom spans for HTTP requests and Cosmos operations -- Structured logging -- Health check endpoint - -### ✅ Phase 3: MCP Integration - -- `MCPStreamableHTTPTool` for Microsoft Learn MCP server -- `microsoft_docs_search` tool via MCP -- `microsoft_code_sample_search` tool via MCP -- Per-request MCP connection (serverless-friendly) - -### 🔄 Phase 4: Production Hardening (Future) - -- Managed Identity authentication -- Retry policies and circuit breakers -- Rate limiting -- Input sanitization - -### 🔄 Phase 5: Caching (Future) - -- Redis session cache for high-frequency access -- Recent messages caching - ---- - -## Configuration - -### Environment Variables - -| Variable | Description | -|----------|-------------| -| `AZURE_OPENAI_ENDPOINT` | Azure OpenAI endpoint URL | -| `AZURE_OPENAI_MODEL` | Model deployment name (e.g., `gpt-4o`) | -| `AZURE_OPENAI_API_VERSION` | API version (e.g., `2024-10-21`) | -| `AZURE_COSMOS_ENDPOINT` | Cosmos DB endpoint | -| `AZURE_COSMOS_DATABASE_NAME` | Database name (e.g., `chat_db`) | -| `AZURE_COSMOS_CONTAINER_NAME` | Messages container name | -| `AZURE_COSMOS_THREADS_CONTAINER_NAME` | Threads container name | -| `ENABLE_OTEL` | Enable OpenTelemetry (`true`/`false`) | -| `OTLP_ENDPOINT` | OTLP collector endpoint | -| `APPLICATIONINSIGHTS_CONNECTION_STRING` | Azure Monitor connection | -| `OTEL_SERVICE_NAME` | Service name for traces | - ---- - -## Open Questions - -1. **Multi-tenant**: Should thread isolation support user-level partitioning? -2. **Caching Strategy**: What's the optimal TTL for conversation context caching? diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep index 3e7759101d..25ed8e1684 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep +++ b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/database/cosmos-nosql.bicep @@ -1,5 +1,6 @@ // Copyright (c) Microsoft. All rights reserved. -// Azure Cosmos DB NoSQL account, database, and container +// Azure Cosmos DB NoSQL account and database +// Note: Containers are auto-created by CosmosHistoryProvider and CosmosConversationStore @description('Name of the Cosmos DB account') param accountName string @@ -13,12 +14,6 @@ param tags object = {} @description('Name of the database') param databaseName string -@description('Name of the container') -param containerName string - -@description('Partition key path for the container') -param partitionKeyPath string = '/thread_id' - @description('Enable free tier (only one per subscription)') param enableFreeTier bool = false @@ -73,40 +68,6 @@ resource database 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases@2023-11-15 } } -// ============================================================================ -// Container -// ============================================================================ - -resource container 'Microsoft.DocumentDB/databaseAccounts/sqlDatabases/containers@2023-11-15' = { - parent: database - name: containerName - properties: { - resource: { - id: containerName - partitionKey: { - paths: [partitionKeyPath] - kind: 'Hash' - } - indexingPolicy: { - automatic: true - indexingMode: 'consistent' - includedPaths: [ - { - path: '/*' - } - ] - excludedPaths: [ - { - path: '/"_etag"/?' - } - ] - } - // Default TTL: -1 means items don't expire unless specified - defaultTtl: -1 - } - } -} - // ============================================================================ // Outputs // ============================================================================ @@ -115,4 +76,3 @@ output accountId string = cosmosAccount.id output accountName string = cosmosAccount.name output endpoint string = cosmosAccount.properties.documentEndpoint output databaseName string = database.name -output containerName string = container.name diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep index 87bd5abf4c..0dca769b09 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep +++ b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep @@ -119,19 +119,19 @@ resource functionApp 'Microsoft.Web/sites@2023-12-01' = { value: azureOpenAiEndpoint } { - name: 'AZURE_OPENAI_MODEL' + name: 'AZURE_OPENAI_DEPLOYMENT_NAME' value: azureOpenAiModel } { - name: 'COSMOS_ENDPOINT' + name: 'AZURE_COSMOS_ENDPOINT' value: cosmosAccount.properties.documentEndpoint } { - name: 'COSMOS_DATABASE_NAME' + name: 'AZURE_COSMOS_DATABASE_NAME' value: cosmosDatabaseName } { - name: 'COSMOS_CONTAINER_NAME' + name: 'AZURE_COSMOS_CONTAINER_NAME' value: cosmosContainerName } ] diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep index 46884b0988..5af32c7c25 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep +++ b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep @@ -96,8 +96,6 @@ module cosmos './core/database/cosmos-nosql.bicep' = { location: location tags: tags databaseName: cosmosDatabaseName - containerName: cosmosContainerName - partitionKeyPath: '/thread_id' } } diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example index 31f80ae04c..2fd4b838df 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example +++ b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example @@ -2,17 +2,18 @@ "IsEncrypted": false, "Values": { "AzureWebJobsStorage": "UseDevelopmentStorage=true", + "AzureWebJobsFeatureFlags": "EnableWorkerIndexing", "FUNCTIONS_WORKER_RUNTIME": "python", "AZURE_OPENAI_ENDPOINT": "https://your-resource.openai.azure.com/", - "AZURE_OPENAI_MODEL": "gpt-4o", + "AZURE_OPENAI_DEPLOYMENT_NAME": "gpt-4o", "AZURE_OPENAI_API_VERSION": "2024-10-21", "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", "AZURE_COSMOS_DATABASE_NAME": "chat_db", "AZURE_COSMOS_CONTAINER_NAME": "messages", - "ENABLE_OTEL": "true", + "APPLICATIONINSIGHTS_CONNECTION_STRING": "", + "ENABLE_INSTRUMENTATION": "true", "ENABLE_SENSITIVE_DATA": "false", - "OTLP_ENDPOINT": "http://localhost:4317", - "OTEL_SERVICE_NAME": "enterprise-chat-agent" + "ENABLE_DEBUG_ENDPOINTS": "false" }, "Host": { "CORS": "*", diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py index 514c94ea07..19ce83a5c8 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/__init__.py @@ -12,5 +12,6 @@ from routes.health import bp as health_bp from routes.messages import bp as messages_bp from routes.threads import bp as threads_bp +from routes.threads import close_store -__all__ = ["threads_bp", "messages_bp", "health_bp"] +__all__ = ["threads_bp", "messages_bp", "health_bp", "close_store"] diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py index c5efe7b790..0033eb88be 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/messages.py @@ -4,6 +4,7 @@ import json import logging +import os from datetime import datetime, timezone import azure.functions as func @@ -18,6 +19,9 @@ from routes.threads import get_store +# Only log message content when explicitly enabled (PII protection) +ENABLE_SENSITIVE_DATA = os.environ.get("ENABLE_SENSITIVE_DATA", "").lower() == "true" + bp = func.Blueprint() @@ -188,13 +192,15 @@ async def get_messages(req: func.HttpRequest) -> func.HttpResponse: # Convert Message objects to serializable dicts # Message has .role (str) and .text (property that concatenates all TextContent) message_list = [] - for msg in messages: + for idx, msg in enumerate(messages): role = msg.role.value if hasattr(msg.role, "value") else str(msg.role) # Use the .text property which concatenates all text contents content = msg.text if hasattr(msg, "text") else "" - logging.info( - f"Message: role={role}, content={content[:100] if content else 'empty'}..." - ) + if ENABLE_SENSITIVE_DATA: + logging.info( + f"Message {idx}: role={role}, " + f"content={content[:100] if content else 'empty'}..." + ) message_list.append( { "role": role, diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py index c764eb96b3..35d19de891 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/routes/threads.py @@ -4,6 +4,7 @@ import json import logging +import os import uuid import azure.functions as func @@ -14,6 +15,9 @@ http_request_span, ) +# Debug endpoints are disabled by default for security +DEBUG_ENDPOINTS_ENABLED = os.environ.get("ENABLE_DEBUG_ENDPOINTS", "").lower() == "true" + bp = func.Blueprint() # Cosmos DB store (lazy singleton) @@ -29,6 +33,15 @@ def get_store() -> CosmosConversationStore: return _store +async def close_store() -> None: + """Close the Cosmos DB conversation store and release resources.""" + global _store + if _store is not None: + await _store.close() + _store = None + logging.info("Closed Cosmos DB conversation store") + + @bp.route(route="threads", methods=["POST"]) async def create_thread(req: func.HttpRequest) -> func.HttpResponse: """ @@ -219,8 +232,17 @@ async def debug_list_sessions(req: func.HttpRequest) -> func.HttpResponse: Debug endpoint to list all session_ids that have messages in CosmosHistoryProvider. This helps diagnose mismatches between thread_ids and session_ids. + SECURITY: Disabled by default. Set ENABLE_DEBUG_ENDPOINTS=true to enable. + GET /api/debug/sessions """ + if not DEBUG_ENDPOINTS_ENABLED: + return func.HttpResponse( + body=json.dumps({"error": "Debug endpoints are disabled"}), + status_code=404, + mimetype="application/json", + ) + history_provider = get_history_provider() try: diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py index 0dc982d51d..ed6d93fece 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py @@ -172,13 +172,16 @@ def get_mcp_tool() -> MCPStreamableHTTPTool: approval_mode="never_require", # Auto-approve tool calls for docs search ) - return _agent - async def close_providers() -> None: - """Close the history provider and release resources.""" + """Close the history provider and conversation store, and release resources.""" global _history_provider if _history_provider is not None: await _history_provider.close() _history_provider = None logging.info("Closed CosmosHistoryProvider") + + # Close the conversation store (imported here to avoid circular imports) + from routes.threads import close_store + + await close_store() diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py index 47f89b625f..1f108fa0b0 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py @@ -3,7 +3,7 @@ Cosmos DB Storage for Thread Metadata This module provides persistent storage for conversation thread metadata -using Azure Cosmos DB. Message storage is handled separately by the +using Azure Cosmos DB (async SDK). Message storage is handled separately by the CosmosHistoryProvider from agent-framework-azure-cosmos package. Document Types: @@ -18,14 +18,15 @@ from datetime import datetime, timezone from typing import Any -from azure.cosmos import CosmosClient +from azure.cosmos import PartitionKey +from azure.cosmos.aio import CosmosClient from azure.cosmos.exceptions import CosmosResourceNotFoundError -from azure.identity import DefaultAzureCredential +from azure.identity.aio import DefaultAzureCredential class CosmosConversationStore: """ - Manages conversation thread metadata in Azure Cosmos DB. + Manages conversation thread metadata in Azure Cosmos DB (async). Thread metadata includes: user_id, title, status, created_at, updated_at. Message persistence is handled by CosmosHistoryProvider (context provider). @@ -61,26 +62,36 @@ def __init__( "Set AZURE_COSMOS_ENDPOINT environment variable." ) - self.credential = credential or DefaultAzureCredential() + self._credential = credential self._client: CosmosClient | None = None self._container = None + self._initialized = False - @property - def container(self): - """Lazy initialization of Cosmos DB container client with auto-create.""" - if self._container is None: - self._client = CosmosClient(self.endpoint, credential=self.credential) - # Create database if it doesn't exist - database = self._client.create_database_if_not_exists(id=self.database_name) - # Create container with thread_id as partition key - self._container = database.create_container_if_not_exists( - id=self.container_name, - partition_key={"paths": ["/thread_id"], "kind": "Hash"}, - ) - logging.info( - f"Initialized Cosmos container: {self.database_name}/{self.container_name}" - ) - return self._container + async def _ensure_initialized(self): + """Lazy async initialization of Cosmos DB container client with auto-create.""" + if self._initialized: + return + + if self._credential is None: + self._credential = DefaultAzureCredential() + + self._client = CosmosClient(self.endpoint, credential=self._credential) + + # Create database if it doesn't exist + database = await self._client.create_database_if_not_exists( + id=self.database_name + ) + + # Create container with thread_id as partition key + self._container = await database.create_container_if_not_exists( + id=self.container_name, + partition_key=PartitionKey(path="/thread_id"), + ) + + self._initialized = True + logging.info( + f"Initialized async Cosmos container: {self.database_name}/{self.container_name}" + ) # ------------------------------------------------------------------------- # Thread Operations @@ -105,6 +116,8 @@ async def create_thread( Returns: The created thread document. """ + await self._ensure_initialized() + now = datetime.now(timezone.utc).isoformat() thread = { "id": thread_id, @@ -120,7 +133,7 @@ async def create_thread( "metadata": metadata or {}, } - self.container.create_item(body=thread) + await self._container.create_item(body=thread) logging.info(f"Created thread {thread_id} for user {user_id} in Cosmos DB") return thread @@ -134,8 +147,10 @@ async def get_thread(self, thread_id: str) -> dict | None: Returns: Thread document or None if not found. """ + await self._ensure_initialized() + try: - return self.container.read_item( + return await self._container.read_item( item=thread_id, partition_key=thread_id, ) @@ -155,8 +170,10 @@ async def delete_thread(self, thread_id: str) -> bool: Returns: True if deleted, False if not found. """ + await self._ensure_initialized() + try: - self.container.delete_item(item=thread_id, partition_key=thread_id) + await self._container.delete_item(item=thread_id, partition_key=thread_id) logging.info(f"Deleted thread {thread_id} from Cosmos DB") return True except CosmosResourceNotFoundError: @@ -199,7 +216,7 @@ async def update_thread( thread["updated_at"] = datetime.now(timezone.utc).isoformat() - updated = self.container.replace_item(item=thread_id, body=thread) + updated = await self._container.replace_item(item=thread_id, body=thread) logging.info(f"Updated thread {thread_id}") return updated @@ -235,6 +252,8 @@ async def list_threads( Returns: List of thread documents sorted by updated_at descending. """ + await self._ensure_initialized() + # Build query with optional filters conditions = ["c.type = 'thread'"] parameters = [] @@ -260,15 +279,23 @@ async def list_threads( ] ) - items = list( - self.container.query_items( - query=query, - parameters=parameters, - enable_cross_partition_query=True, - ) - ) + items = [] + async for item in self._container.query_items( + query=query, + parameters=parameters, + ): + items.append(item) logging.info( f"Listed {len(items)} threads (user_id={user_id}, status={status})" ) return items + + async def close(self) -> None: + """Close the Cosmos DB client and release resources.""" + if self._client is not None: + await self._client.close() + self._client = None + self._container = None + self._initialized = False + logging.info("Closed async Cosmos DB client") diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py index 7e3fed2c98..d7dcd8e74e 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py @@ -1,7 +1,7 @@ """ Calculator Tool -Provides safe mathematical expression evaluation. +Provides safe mathematical expression evaluation with DoS protections. """ import ast @@ -10,6 +10,11 @@ from agent_framework import tool +# Safety limits to prevent DoS attacks +MAX_EXPRESSION_LENGTH = 200 # Maximum characters in expression +MAX_EXPONENT = 100 # Maximum allowed exponent value +MAX_RESULT_MAGNITUDE = 1e308 # Maximum result magnitude (near float max) + # Safe operators for expression evaluation SAFE_OPERATORS = { ast.Add: operator.add, @@ -35,9 +40,22 @@ def _safe_eval(node: ast.AST) -> Union[int, float]: left = _safe_eval(node.left) right = _safe_eval(node.right) op_type = type(node.op) - if op_type in SAFE_OPERATORS: - return SAFE_OPERATORS[op_type](left, right) - raise ValueError(f"Unsupported operator: {op_type.__name__}") + if op_type not in SAFE_OPERATORS: + raise ValueError(f"Unsupported operator: {op_type.__name__}") + + # Enforce exponent limit to prevent DoS (e.g., 2 ** 1000000000) + if op_type is ast.Pow and abs(right) > MAX_EXPONENT: + raise ValueError( + f"Exponent {right} exceeds maximum allowed ({MAX_EXPONENT})" + ) + + result = SAFE_OPERATORS[op_type](left, right) + + # Check result magnitude + if abs(result) > MAX_RESULT_MAGNITUDE: + raise ValueError("Result exceeds maximum allowed magnitude") + + return result if isinstance(node, ast.UnaryOp): operand = _safe_eval(node.operand) @@ -57,17 +75,25 @@ def calculate(expression: str) -> float: """ Evaluate a mathematical expression safely. - Supports: +, -, *, /, ** (power), parentheses + Supports: +, -, *, /, ** (power with exponent <= 100), parentheses Args: expression: A mathematical expression string (e.g., "85 * 0.15") + Maximum length: 200 characters. Returns: The result of the calculation. Raises: - ValueError: If the expression contains unsupported operations. + ValueError: If the expression contains unsupported operations, + exceeds length limits, or has exponents > 100. """ + # Length limit to prevent parsing DoS + if len(expression) > MAX_EXPRESSION_LENGTH: + raise ValueError( + f"Expression exceeds maximum length ({MAX_EXPRESSION_LENGTH} chars)" + ) + try: # Parse the expression into an AST tree = ast.parse(expression, mode="eval") From 2851f44323a5f4ec200de2cff01ef28b62654fc3 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:44:18 +0000 Subject: [PATCH 07/10] Review comment fixed --- .../05-end-to-end/enterprise-chat-agent/README.md | 1 + .../infra/core/host/function-app.bicep | 9 ++++++++- .../05-end-to-end/enterprise-chat-agent/infra/main.bicep | 5 +++++ .../enterprise-chat-agent/local.settings.json.example | 1 + 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/README.md b/python/samples/05-end-to-end/enterprise-chat-agent/README.md index a36df0f098..9ebc8bb249 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/README.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/README.md @@ -105,6 +105,7 @@ Copy `local.settings.json.example` to `local.settings.json` and update: "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", "AZURE_COSMOS_DATABASE_NAME": "chat_db", "AZURE_COSMOS_CONTAINER_NAME": "messages", + "AZURE_COSMOS_THREADS_CONTAINER_NAME": "threads", "APPLICATIONINSIGHTS_CONNECTION_STRING": "", "ENABLE_INSTRUMENTATION": "true", "ENABLE_SENSITIVE_DATA": "false", diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep index 0dca769b09..5823bc3420 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep +++ b/python/samples/05-end-to-end/enterprise-chat-agent/infra/core/host/function-app.bicep @@ -22,9 +22,12 @@ param cosmosAccountName string @description('Cosmos DB database name') param cosmosDatabaseName string -@description('Cosmos DB container name') +@description('Cosmos DB container name for messages') param cosmosContainerName string +@description('Cosmos DB container name for threads') +param cosmosThreadsContainerName string = 'threads' + @description('Azure OpenAI endpoint URL') param azureOpenAiEndpoint string = '' @@ -134,6 +137,10 @@ resource functionApp 'Microsoft.Web/sites@2023-12-01' = { name: 'AZURE_COSMOS_CONTAINER_NAME' value: cosmosContainerName } + { + name: 'AZURE_COSMOS_THREADS_CONTAINER_NAME' + value: cosmosThreadsContainerName + } ] cors: { allowedOrigins: [ diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep index 5af32c7c25..73449968da 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep +++ b/python/samples/05-end-to-end/enterprise-chat-agent/infra/main.bicep @@ -37,6 +37,9 @@ param cosmosDatabaseName string = 'chat_db' @description('Cosmos DB container name for messages') param cosmosContainerName string = 'messages' +@description('Cosmos DB container name for threads') +param cosmosThreadsContainerName string = 'threads' + // ============================================================================ // Variables // ============================================================================ @@ -115,6 +118,7 @@ module functionApp './core/host/function-app.bicep' = { cosmosAccountName: cosmos.outputs.accountName cosmosDatabaseName: cosmosDatabaseName cosmosContainerName: cosmosContainerName + cosmosThreadsContainerName: cosmosThreadsContainerName azureOpenAiEndpoint: azureOpenAiEndpoint azureOpenAiModel: azureOpenAiModel } @@ -134,5 +138,6 @@ output AZURE_FUNCTION_APP_URL string = functionApp.outputs.url output AZURE_COSMOS_ENDPOINT string = cosmos.outputs.endpoint output AZURE_COSMOS_DATABASE_NAME string = cosmosDatabaseName output AZURE_COSMOS_CONTAINER_NAME string = cosmosContainerName +output AZURE_COSMOS_THREADS_CONTAINER_NAME string = cosmosThreadsContainerName output APPLICATIONINSIGHTS_CONNECTION_STRING string = monitoring.outputs.applicationInsightsConnectionString diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example index 2fd4b838df..d96c7d89a2 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example +++ b/python/samples/05-end-to-end/enterprise-chat-agent/local.settings.json.example @@ -10,6 +10,7 @@ "AZURE_COSMOS_ENDPOINT": "https://your-cosmos-account.documents.azure.com:443/", "AZURE_COSMOS_DATABASE_NAME": "chat_db", "AZURE_COSMOS_CONTAINER_NAME": "messages", + "AZURE_COSMOS_THREADS_CONTAINER_NAME": "threads", "APPLICATIONINSIGHTS_CONNECTION_STRING": "", "ENABLE_INSTRUMENTATION": "true", "ENABLE_SENSITIVE_DATA": "false", From c17d24a1b4f0069c009d5526d0e470930390f439 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Thu, 26 Mar 2026 09:57:23 +0000 Subject: [PATCH 08/10] Update design document references for clarity and relevance --- .../05-end-to-end/enterprise-chat-agent/docs/DESIGN.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md index 9612cbf4df..161b1e96d8 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md +++ b/python/samples/05-end-to-end/enterprise-chat-agent/docs/DESIGN.md @@ -21,10 +21,8 @@ This document describes the architecture and design decisions for a **production ### References - **GitHub Issue**: [#2436 - Production Chat API with Azure Functions, Cosmos DB & Agent Framework](https://github.com/microsoft/agent-framework/issues/2436) -- [Create and run a durable agent (Python)](https://learn.microsoft.com/en-us/agent-framework/tutorials/agents/create-and-run-durable-agent) -- [Agent Framework Tools](https://learn.microsoft.com/en-us/agent-framework/concepts/tools) -- [Multi-agent Reference Architecture](https://learn.microsoft.com/en-us/azure/architecture/ai-ml/architecture/build-multi-agent-framework-solution) -- [Well-Architected AI Agents](https://learn.microsoft.com/en-us/azure/well-architected/service-guides/ai-agent-architecture) +- [Agent Framework Tools (Python)](https://learn.microsoft.com/agent-framework/agents/tools/function-tools) +- [Azure Functions Python Developer Guide](https://learn.microsoft.com/azure/azure-functions/functions-reference-python) --- From e57d2a22939697a4f9640454187e889047b0730e Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:11:01 +0000 Subject: [PATCH 09/10] Refactor query construction in CosmosConversationStore and add security comments in weather tool --- .../enterprise-chat-agent/services/cosmos_store.py | 12 ++++++------ .../enterprise-chat-agent/tools/weather.py | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py index 1f108fa0b0..ba49280be2 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/cosmos_store.py @@ -266,12 +266,12 @@ async def list_threads( conditions.append("c.status = @status") parameters.append({"name": "@status", "value": status}) - query = f""" - SELECT * FROM c - WHERE {' AND '.join(conditions)} - ORDER BY c.updated_at DESC - OFFSET @offset LIMIT @limit - """ + # Build WHERE clause from fixed condition set (not user input) + where_clause = " AND ".join(conditions) + query = ( + f"SELECT * FROM c WHERE {where_clause} " # nosec B608 + "ORDER BY c.updated_at DESC OFFSET @offset LIMIT @limit" + ) parameters.extend( [ {"name": "@offset", "value": offset}, diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py index cdf2aa609e..81257ba859 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/weather.py @@ -24,10 +24,11 @@ def get_weather(location: str) -> dict: # Simulated weather data (replace with actual API call in production) conditions = ["sunny", "cloudy", "light rain", "partly cloudy", "overcast"] + # Random is used for mock demo data, not for security purposes return { "location": location, - "temp": random.randint(32, 85), - "condition": random.choice(conditions), - "humidity": random.randint(30, 90), + "temp": random.randint(32, 85), # nosec B311 + "condition": random.choice(conditions), # nosec B311 + "humidity": random.randint(30, 90), # nosec B311 "unit": "fahrenheit", } From 6d621b657bda1742045bbb17868410feeb0a7310 Mon Sep 17 00:00:00 2001 From: Vijay Anand M <12212247+vj-msft@users.noreply.github.com> Date: Thu, 26 Mar 2026 10:18:29 +0000 Subject: [PATCH 10/10] Refactor type hints to use union syntax for optional types in multiple files --- .../enterprise-chat-agent/services/agent_service.py | 7 +++---- .../enterprise-chat-agent/services/observability.py | 8 ++++---- .../enterprise-chat-agent/tools/calculator.py | 3 +-- .../enterprise-chat-agent/tools/knowledge_base.py | 4 +--- 4 files changed, 9 insertions(+), 13 deletions(-) diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py index ed6d93fece..0e2d2b70ec 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/agent_service.py @@ -12,7 +12,6 @@ import logging import os from pathlib import Path -from typing import Optional from agent_framework import Agent, MCPStreamableHTTPTool from agent_framework.azure import AzureOpenAIChatClient @@ -24,9 +23,9 @@ search_knowledge_base, ) -_history_provider: Optional[CosmosHistoryProvider] = None -_agent: Optional[Agent] = None -_credential: Optional[DefaultAzureCredential] = None +_history_provider: CosmosHistoryProvider | None = None +_agent: Agent | None = None +_credential: DefaultAzureCredential | None = None # Prompts directory _PROMPTS_DIR = Path(__file__).parent.parent / "prompts" diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py index 07e4ccfa2d..bec7baf37c 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/services/observability.py @@ -12,8 +12,8 @@ """ import logging +from collections.abc import AsyncIterator from contextlib import asynccontextmanager -from typing import AsyncIterator, Optional # Import framework's observability - use framework APIs, don't recreate them from agent_framework.observability import configure_otel_providers, get_tracer @@ -70,8 +70,8 @@ def init_observability() -> None: async def http_request_span( method: str, path: str, - thread_id: Optional[str] = None, - user_id: Optional[str] = None, + thread_id: str | None = None, + user_id: str | None = None, ) -> AsyncIterator[Span]: """Create a top-level HTTP request span. @@ -126,7 +126,7 @@ async def http_request_span( async def cosmos_span( operation: str, container: str, - partition_key: Optional[str] = None, + partition_key: str | None = None, ) -> AsyncIterator[None]: """Create a Cosmos DB operation span. diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py index d7dcd8e74e..79d9a8a44f 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/calculator.py @@ -6,7 +6,6 @@ import ast import operator -from typing import Union from agent_framework import tool @@ -27,7 +26,7 @@ } -def _safe_eval(node: ast.AST) -> Union[int, float]: +def _safe_eval(node: ast.AST) -> int | float: """ Safely evaluate an AST node containing only numeric operations. """ diff --git a/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py index f7d0d124b6..8b0fb26bae 100644 --- a/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py +++ b/python/samples/05-end-to-end/enterprise-chat-agent/tools/knowledge_base.py @@ -6,8 +6,6 @@ Cosmos DB, or another search service. """ -from typing import Optional - from agent_framework import tool # Simulated knowledge base entries @@ -63,7 +61,7 @@ @tool def search_knowledge_base( query: str, - category: Optional[str] = None, + category: str | None = None, max_results: int = 3, ) -> list[dict]: """