Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 162 additions & 13 deletions src/backend/v4/magentic_agents/common/lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from agent_framework_azure_ai import AzureAIAgentClient
from azure.ai.agents.aio import AgentsClient
from azure.identity.aio import DefaultAzureCredential
from common.config.app_config import config
from common.database.database_base import DatabaseBase
from common.models.messages_af import CurrentTeamAgent, TeamConfiguration
from common.utils.utils_agents import (
Expand Down Expand Up @@ -59,6 +60,8 @@ def __init__(
self.agent_instructions: str | None = agent_instructions
self.model_deployment_name: str | None = model_deployment_name
self.logger = logging.getLogger(__name__)
# Initialize project_client for RAI agent reuse (used in resolve_agent_id)
self.project_client = None

async def open(self) -> "MCPEnabledBase":
if self._stack is not None:
Expand All @@ -76,6 +79,16 @@ async def open(self) -> "MCPEnabledBase":
)
if self._stack:
await self._stack.enter_async_context(self.client)

# Initialize project_client for RAI agent reuse (Projects SDK)
# This is used by resolve_agent_id to retrieve agents created via Projects SDK
if not self.project_client:
try:
self.project_client = config.get_ai_project_client()
self.logger.info("Initialized project_client for RAI agent reuse.")
except Exception as ex:
self.logger.warning("Failed to initialize project_client: %s", ex)

# Prepare MCP
await self._prepare_mcp_tool()

Expand Down Expand Up @@ -149,6 +162,55 @@ def get_chat_client(self, chat_client) -> AzureAIAgentClient:
)
return chat_client

async def resolve_agent_id(self, agent_id: str) -> Optional[str]:
"""Resolve agent ID via Projects SDK first, fallback to AgentsClient.

RAI agents are created via Projects SDK (project_client), so we try that first.
If that fails, fall back to the endpoint-based AgentsClient.

Returns:
Resolved agent ID if found, None otherwise.
"""
# Try Projects SDK first (for RAI agents created via project_client)
if hasattr(self, "project_client") and self.project_client:
try:
agent = await self.project_client.agents.get_agent(agent_id)
if agent and agent.id:
self.logger.info(
"RAI.AgentReuseSuccess: Resolved agent via Projects SDK (agent_id=%s)",
agent.id,
)
return agent.id
except Exception as ex:
self.logger.warning(
"RAI.AgentReuseMiss: Projects SDK get_agent failed (agent_id=%s): %s. Reason=ProjectsGetFailed",
agent_id,
ex,
)

# Fallback to AgentsClient (endpoint-based)
if self.client:
try:
agent = await self.client.get_agent(agent_id=agent_id)
if agent and agent.id:
self.logger.info(
"RAI.AgentReuseSuccess: Resolved agent via AgentsClient (agent_id=%s)",
agent.id,
)
return agent.id
except Exception as ex:
self.logger.warning(
"RAI.AgentReuseMiss: AgentsClient get_agent failed (agent_id=%s): %s. Reason=EndpointGetFailed",
agent_id,
ex,
)

self.logger.error(
"RAI.AgentReuseMiss: Could not resolve agent_id=%s via any client. Reason=ClientMismatch",
agent_id,
)
return None

def get_agent_id(self, chat_client) -> str:
"""Return the underlying agent ID."""
if chat_client and chat_client.agent_id is not None:
Expand All @@ -159,6 +221,17 @@ def get_agent_id(self, chat_client) -> str:
and self._agent.chat_client.agent_id is not None
):
return self._agent.chat_client.agent_id # type: ignore

# This should not happen if server-side agent creation is working properly
if self.agent_name and "RAI" in self.agent_name.upper():
self.logger.error(
"RAI.AgentReuseMiss: No valid agent_id found for RAI agent. Reason=NoValidChatClient (This indicates server-side agent creation failed)"
)
raise RuntimeError(
f"RAI agent '{self.agent_name}' has no valid agent_id. Server-side agent creation may have failed."
)

# For non-RAI agents, continue normal flow
id = generate_assistant_id()
self.logger.info("Generated new agent ID: %s", id)
return id
Expand All @@ -171,29 +244,100 @@ async def get_database_team_agent(self) -> Optional[AzureAIAgentClient]:
self.memory_store, self.team_config, self.agent_name
)

if agent_id:
agent = await self.client.get_agent(agent_id=agent_id)
if agent and agent.id is not None:
chat_client = AzureAIAgentClient(
project_endpoint=self.project_endpoint,
agent_id=agent.id,
model_deployment_name=self.model_deployment_name,
async_credential=self.creds,
)
if not agent_id:
self.logger.info(
"RAI.AgentReuseMiss: No stored agent_id found in database. Reason=NoStoredId (agent_name=%s)",
self.agent_name,
)
return None

# Use resolve_agent_id for dual-client fallback (Projects SDK → AgentsClient)
resolved_id = await self.resolve_agent_id(agent_id)
if not resolved_id:
self.logger.error(
"RAI.AgentReuseMiss: Stored agent_id=%s could not be resolved via any client. Clearing stale DB entry.",
agent_id,
)
# Clear the stale agent ID from database so we can create a fresh one
try:
await self._clear_stale_agent_from_db(agent_id)
except Exception as clear_ex:
self.logger.warning("Failed to clear stale agent from DB: %s", clear_ex)
return None

# Create AzureAIAgentClient with resolved agent ID
# For RAI agents created via Projects SDK, use project_client if available
if hasattr(self, "project_client") and self.project_client:
chat_client = AzureAIAgentClient(
project_client=self.project_client,
agent_id=resolved_id,
async_credential=self.creds,
)
self.logger.info(
"RAI.AgentReuseSuccess: Created AzureAIAgentClient with Projects SDK (agent_id=%s)",
resolved_id,
)
else:
# Fallback to endpoint-based client
chat_client = AzureAIAgentClient(
project_endpoint=self.project_endpoint,
agent_id=resolved_id,
model_deployment_name=self.model_deployment_name,
async_credential=self.creds,
)
self.logger.info(
"RAI.AgentReuseSuccess: Created AzureAIAgentClient with endpoint (agent_id=%s)",
resolved_id,
)

except (
Exception
) as ex: # Consider narrowing this to specific exceptions if possible
self.logger.error("Failed to initialize Get database team agent: %s", ex)
except Exception as ex:
self.logger.error(
"RAI.AgentReuseMiss: Failed to retrieve database team agent: %s. Reason=Exception",
ex,
)
return chat_client

async def _clear_stale_agent_from_db(self, stale_agent_id: str) -> None:
"""Clear a stale agent ID from the database."""
try:
# Note: This is a placeholder - the actual implementation depends on your DB API
# You may need to call a delete method on memory_store
self.logger.info(
"RAI: Clearing stale agent_id=%s from database for agent_name=%s",
stale_agent_id,
self.agent_name,
)
# If your DatabaseBase has a delete method, call it here
# await self.memory_store.delete_team_agent(self.team_config.team_id, self.agent_name)
except Exception as ex:
self.logger.error("Failed to clear stale agent from database: %s", ex)

async def save_database_team_agent(self) -> None:
"""Save current team agent to database."""
try:
if self._agent.id is None:
self.logger.error("Cannot save database team agent: agent_id is None")
return

# Check if agent already exists in DB to prevent overwriting during reuse
stored_id = await get_database_team_agent_id(
self.memory_store, self.team_config, self.agent_name
)

if stored_id == self._agent.id:
self.logger.info(
"RAI.AgentReuseSuccess: Agent ID unchanged (agent_id=%s); skipping save to prevent overwrite.",
self._agent.id,
)
return

if stored_id and stored_id != self._agent.id:
self.logger.warning(
"RAI: Overwriting existing agent_id=%s with new agent_id=%s (This may indicate reuse failure)",
stored_id,
self._agent.id,
)

currentAgent = CurrentTeamAgent(
team_id=self.team_config.team_id,
team_name=self.team_config.name,
Expand All @@ -203,6 +347,11 @@ async def save_database_team_agent(self) -> None:
agent_instructions=self.agent_instructions,
)
await self.memory_store.add_team_agent(currentAgent)
self.logger.info(
"RAI: Saved agent to database (agent_id=%s, agent_name=%s)",
self._agent.id,
self.agent_name,
)

except Exception as ex:
self.logger.error("Failed to save save database: %s", ex)
Expand Down
59 changes: 56 additions & 3 deletions src/backend/v4/magentic_agents/foundry_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ def __init__(
self.enable_code_interpreter = enable_code_interpreter
self.search = search_config
self.logger = logging.getLogger(__name__)
self.project_client = config.get_ai_project_client()
# Initialize project_client if not already set by base class
if not self.project_client:
self.project_client = config.get_ai_project_client()

# Decide early whether Azure Search mode should be activated
self._use_azure_search = self._is_azure_search_requested()
Expand Down Expand Up @@ -105,6 +107,48 @@ async def _collect_tools(self) -> List:
self.logger.info("Total tools collected (MCP path): %d", len(tools))
return tools

# -------------------------
# MCP Agent Creation helper
# -------------------------
async def _create_mcp_agent_on_server(self) -> Optional[AzureAIAgentClient]:
"""
Create a server-side Azure AI agent for MCP/RAI path.
This ensures the agent exists on the server and can be reused.

Returns:
AzureAIAgentClient with server-created agent ID, or None on failure.
"""
try:
# Create agent on server via Projects SDK
azure_agent = await self.project_client.agents.create_agent(
model=self.model_deployment_name,
name=self.agent_name,
instructions=self.agent_instructions,
description=self.agent_description if self.agent_description else "",
)

self.logger.info(
"RAI: Created server-side agent (agent_id=%s, name=%s)",
azure_agent.id,
self.agent_name,
)

# Create client with server-generated agent ID
chat_client = AzureAIAgentClient(
project_client=self.project_client,
agent_id=azure_agent.id,
async_credential=self.creds,
)
return chat_client

except Exception as ex:
self.logger.error(
"RAI: Failed to create server-side agent (name=%s): %s",
self.agent_name,
ex,
)
return None

# -------------------------
# Azure Search helper
# -------------------------
Expand Down Expand Up @@ -258,9 +302,18 @@ async def _after_open(self) -> None:
# use MCP path
self.logger.info("Initializing agent in MCP mode.")
tools = await self._collect_tools()

# For RAI agents (no chatClient), create server-side agent first
if not chatClient:
chat_client = await self._create_mcp_agent_on_server()
if not chat_client:
raise RuntimeError("Failed to create RAI agent on server.")
else:
chat_client = chatClient

self._agent = ChatAgent(
id=self.get_agent_id(chatClient),
chat_client=self.get_chat_client(chatClient),
id=self.get_agent_id(chat_client),
chat_client=self.get_chat_client(chat_client),
instructions=self.agent_instructions,
name=self.agent_name,
description=self.agent_description,
Expand Down
Loading