diff --git a/app_main.py b/app_main.py index b5e3713..fb12ad6 100644 --- a/app_main.py +++ b/app_main.py @@ -6,8 +6,12 @@ from src.scouter.agent.mcp import app as mcp_app from src.scouter.config.llm import get_client_config +from src.scouter.config.logging import setup_logging from src.scouter.ingestion.api import router as ingestion_router +# Setup logging +setup_logging() + logger = logging.getLogger(__name__) config = get_client_config() diff --git a/src/scouter/config/logging.py b/src/scouter/config/logging.py new file mode 100644 index 0000000..9c3d973 --- /dev/null +++ b/src/scouter/config/logging.py @@ -0,0 +1,42 @@ +"""Logging configuration for the Scouter project.""" + +import logging +import sys + + +def setup_logging(level: str = "INFO") -> None: + """Configure logging for the application. + + Args: + level: The logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + """ + # Create logger + logger = logging.getLogger("scouter") + logger.setLevel(getattr(logging, level.upper())) + + # Remove any existing handlers + for handler in logger.handlers[:]: + logger.removeHandler(handler) + + # Create console handler + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(getattr(logging, level.upper())) + + # Create formatter + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + ) + console_handler.setFormatter(formatter) + + # Add handler to logger + logger.addHandler(console_handler) + + # Set up root logger to avoid duplicate logs + root_logger = logging.getLogger() + root_logger.setLevel( + logging.WARNING + ) # Only show warnings and above from other libraries + + # Ensure scouter logger propagates + logger.propagate = False diff --git a/src/scouter/llmcore/__init__.py b/src/scouter/llmcore/__init__.py index d190412..e3765e9 100644 --- a/src/scouter/llmcore/__init__.py +++ b/src/scouter/llmcore/__init__.py @@ -1,6 +1,15 @@ from .agent import AgentRun, run_agent from .client import ChatCompletionOptions, LLMConfig, call_llm, create_llm_client -from .exceptions import AgentError, LLMError, ToolExecutionError +from .exceptions import ( + AgentError, + InvalidRunStateError, + InvalidToolDefinitionError, + LLMError, + MaxRetriesExceededError, + ToolExecutionError, +) +from .messages import create_instruction +from .prompt import resolve_prompt from .tools import ( Tool, create_tool, @@ -20,6 +29,7 @@ ChatCompletionToolMessageParam, ChatCompletionToolParam, ChatCompletionUserMessageParam, + Prompt, ) from .utils import retry_loop @@ -36,16 +46,22 @@ "ChatCompletionToolMessageParam", "ChatCompletionToolParam", "ChatCompletionUserMessageParam", + "InvalidRunStateError", + "InvalidToolDefinitionError", "LLMConfig", "LLMError", + "MaxRetriesExceededError", + "Prompt", "Tool", "ToolExecutionError", "call_llm", + "create_instruction", "create_llm_client", "create_tool", "execute_tool", "lookup_tool", "register_tool", + "resolve_prompt", "retry_loop", "run_agent", "run_tool", diff --git a/src/scouter/llmcore/agent.py b/src/scouter/llmcore/agent.py index 246a14e..ac154d8 100644 --- a/src/scouter/llmcore/agent.py +++ b/src/scouter/llmcore/agent.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import logging from dataclasses import dataclass, field from time import time from typing import TYPE_CHECKING, cast @@ -20,8 +21,11 @@ ) from .client import ChatCompletionOptions, call_llm +from .exceptions import InvalidRunStateError from .tools import run_tool +logger = logging.getLogger(__name__) + @dataclass class InputStep: @@ -89,7 +93,8 @@ def total_usage( def last_output(self) -> str: if not self.steps: msg = "No steps in run" - raise ValueError(msg) + logger.error("Attempted to get last output from empty run") + raise InvalidRunStateError(msg) last_step = self.steps[-1] if isinstance(last_step, LLMStep): content = last_step.message.get("content") @@ -129,6 +134,9 @@ def run_agent( tools: Iterable[ChatCompletionToolUnionParam] | None = None, options: ChatCompletionOptions | None = None, ): + logger.info( + "Starting agent run with model=%s, initial_steps=%d", model, len(run.steps) + ) while run.continue_condition(run): completion: ChatCompletion = call_llm( model, run.conversation_history, tools, options @@ -138,18 +146,26 @@ def run_agent( # Handle tool calls if msg.tool_calls: + logger.debug("Processing %d tool calls", len(msg.tool_calls)) for tc in msg.tool_calls: tc = cast("ChatCompletionMessageToolCall", tc) args = json.loads(tc.function.arguments) + logger.debug( + "Executing tool '%s' with args: %s", tc.function.name, args + ) start = time() try: output = run_tool(tc.function.name, args) success = True error = None + logger.debug("Tool '%s' executed successfully", tc.function.name) except Exception as e: # noqa: BLE001 output = "" success = False error = str(e) + logger.warning( + "Tool '%s' execution failed: %s", tc.function.name, str(e) + ) end = time() run.add_step( ToolStep( @@ -162,3 +178,4 @@ def run_agent( error, ) ) + logger.info("Agent run completed with %d total steps", len(run.steps)) diff --git a/src/scouter/llmcore/client.py b/src/scouter/llmcore/client.py index 8944e18..fd8793e 100644 --- a/src/scouter/llmcore/client.py +++ b/src/scouter/llmcore/client.py @@ -1,3 +1,4 @@ +import logging import os from collections.abc import Iterable from dataclasses import dataclass @@ -12,6 +13,8 @@ from .utils import retry_loop +logger = logging.getLogger(__name__) + class ChatCompletionOptions(TypedDict, total=False): """Options for ChatCompletion API calls. @@ -50,13 +53,20 @@ def load_from_env() -> "LLMConfig": def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI: cfg = cfg or LLMConfig.load_from_env() + logger.debug( + "Creating LLM client with timeout=%d, max_retries=%d", + cfg.timeout, + cfg.max_retries, + ) - return OpenAI( + client = OpenAI( api_key=cfg.api_key, base_url=cfg.base_url, timeout=cfg.timeout, max_retries=cfg.max_retries, ) + logger.info("LLM client created successfully") + return client client = create_llm_client() @@ -76,6 +86,13 @@ def call_llm( tools: Optional tools. options: Optional ChatCompletion options like max_tokens, temperature, etc. """ + tools_count = sum(1 for _ in tools) if tools else 0 + logger.debug( + "Calling LLM with model=%s, message_count=%d, tools_count=%d", + model, + len(messages), + tools_count, + ) def _call(): kwargs = options or {} @@ -83,4 +100,6 @@ def _call(): model=model, messages=messages, tools=tools or [], **kwargs ) - return retry_loop(_call) + result = retry_loop(_call) + logger.debug("LLM call completed successfully") + return result diff --git a/src/scouter/llmcore/exceptions.py b/src/scouter/llmcore/exceptions.py index 417742b..98bf984 100644 --- a/src/scouter/llmcore/exceptions.py +++ b/src/scouter/llmcore/exceptions.py @@ -8,3 +8,15 @@ class ToolExecutionError(LLMError): class AgentError(LLMError): """Raised when agent operations fail.""" + + +class MaxRetriesExceededError(LLMError): + """Raised when maximum retry attempts are exceeded.""" + + +class InvalidRunStateError(LLMError): + """Raised when an agent run is in an invalid state.""" + + +class InvalidToolDefinitionError(LLMError): + """Raised when a tool is defined incorrectly.""" diff --git a/src/scouter/llmcore/messages.py b/src/scouter/llmcore/messages.py new file mode 100644 index 0000000..034d8fd --- /dev/null +++ b/src/scouter/llmcore/messages.py @@ -0,0 +1,29 @@ +import logging + +from .agent import InputStep, Step +from .types import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam + +logger = logging.getLogger(__name__) + + +def create_instruction( + steps: list[Step], system: str | None = None, prompt: str | None = None +) -> None: + """Add system and user messages to the steps list as InputStep instances.""" + logger.debug( + "Creating instruction with system=%s, prompt=%s", bool(system), bool(prompt) + ) + if system: + steps.append( + InputStep( + message=ChatCompletionSystemMessageParam(role="system", content=system) + ) + ) + logger.debug("Added system message to steps") + if prompt: + steps.append( + InputStep( + message=ChatCompletionUserMessageParam(role="user", content=prompt) + ) + ) + logger.debug("Added user message to steps") diff --git a/src/scouter/llmcore/prompt.py b/src/scouter/llmcore/prompt.py new file mode 100644 index 0000000..9904867 --- /dev/null +++ b/src/scouter/llmcore/prompt.py @@ -0,0 +1,16 @@ +import logging + +from .types import Prompt + +logger = logging.getLogger(__name__) + + +def resolve_prompt(prompt: Prompt, *args, **kwargs) -> str: + """Resolve a Prompt to a string, executing callables with optional args if necessary.""" + if isinstance(prompt, str): + logger.debug("Resolved string prompt directly") + return prompt + logger.debug("Executing callable prompt") + result = prompt(*args, **kwargs) + logger.debug("Callable prompt executed successfully") + return result diff --git a/src/scouter/llmcore/tools.py b/src/scouter/llmcore/tools.py index f592fed..b76fff9 100644 --- a/src/scouter/llmcore/tools.py +++ b/src/scouter/llmcore/tools.py @@ -3,12 +3,15 @@ import asyncio import inspect import json +import logging from collections.abc import Callable # noqa: TC003 from typing import TYPE_CHECKING, Any, get_origin from pydantic import BaseModel, Field -from .exceptions import ToolExecutionError +from .exceptions import InvalidToolDefinitionError, ToolExecutionError + +logger = logging.getLogger(__name__) if TYPE_CHECKING: from .types import ChatCompletionToolParam @@ -28,11 +31,13 @@ class Tool(BaseModel): input_type: type[BaseModel] | None = None def model_post_init(self, /, __context) -> None: + logger.debug("Initializing tool '%s'", self.name) # 1. Extract input model from handler signature sig = inspect.signature(self.handler) if not sig.parameters: msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)." - raise TypeError(msg) + logger.error("Tool validation failed: %s", msg) + raise InvalidToolDefinitionError(msg) param = next(iter(sig.parameters.values())) input_model = param.annotation @@ -40,7 +45,8 @@ def model_post_init(self, /, __context) -> None: origin = get_origin(input_model) or input_model if not (isinstance(origin, type) and issubclass(origin, BaseModel)): msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}" - raise TypeError(msg) + logger.error("Tool validation failed: %s", msg) + raise InvalidToolDefinitionError(msg) self.input_type = origin # SAVE THIS for execute_tool @@ -53,7 +59,8 @@ def model_post_init(self, /, __context) -> None: pass # Allow BaseModel else: msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str" - raise TypeError(msg) + logger.error("Tool validation failed: %s", msg) + raise InvalidToolDefinitionError(msg) # 3. Auto-fill everything self.parameters_schema = origin.model_json_schema() @@ -74,6 +81,7 @@ def model_post_init(self, /, __context) -> None: f"The tool will **always return JSON matching this exact schema**:\n" f"```json\n{pretty_output}\n```" ) + logger.debug("Tool '%s' initialized successfully", self.name) def openai_tool_spec(self) -> ChatCompletionToolParam: return { @@ -120,8 +128,11 @@ def run_tool(name: str, raw_args: dict[str, Any]) -> str: """ Looks up a tool by name and executes it. """ + logger.debug("Running tool '%s' with args: %s", name, raw_args) tool_instance = lookup_tool(name) - return execute_tool(tool_instance, raw_args) + result = execute_tool(tool_instance, raw_args) + logger.debug("Tool '%s' completed successfully", name) + return result def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str: @@ -132,6 +143,7 @@ def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str: 3. Gets OutputModel or str. 4. Returns OutputModel.model_dump_json() or the str. """ + logger.debug("Executing tool '%s'", tool_instance.name) try: # 1. Instantiate the specific input model input_model_cls = tool_instance.input_type @@ -153,10 +165,14 @@ def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str: # 4. Serialize Output if isinstance(result_model, str): + logger.debug("Tool '%s' returned string result", tool_instance.name) return result_model - return result_model.model_dump_json() + result = result_model.model_dump_json() + logger.debug("Tool '%s' returned JSON result", tool_instance.name) + return result except Exception as e: + logger.exception("Tool '%s' execution failed", tool_instance.name) msg = f"Error executing tool '{tool_instance.name}': {e!s}" raise ToolExecutionError(msg) from e @@ -171,8 +187,10 @@ def register_tool(tool_instance: Tool) -> None: """ if not tool_instance.name: msg = "Cannot register tool without a name." + logger.error("Tool registration failed: %s", msg) raise ToolExecutionError(msg) TOOL_REGISTRY[tool_instance.name] = tool_instance + logger.info("Tool '%s' registered successfully", tool_instance.name) def lookup_tool(name: str) -> Tool: diff --git a/src/scouter/llmcore/types.py b/src/scouter/llmcore/types.py index 6b22afb..899b47e 100644 --- a/src/scouter/llmcore/types.py +++ b/src/scouter/llmcore/types.py @@ -1,3 +1,5 @@ +from collections.abc import Callable + # Re-export OpenAI types from openai.types.chat import ( ChatCompletion, @@ -13,6 +15,9 @@ ChatCompletionMessageToolCall, ) +# Custom types +Prompt = str | Callable[[], str] + __all__ = [ "ChatCompletion", "ChatCompletionAssistantMessageParam", @@ -23,4 +28,5 @@ "ChatCompletionToolMessageParam", "ChatCompletionToolParam", "ChatCompletionUserMessageParam", + "Prompt", ] diff --git a/src/scouter/llmcore/utils.py b/src/scouter/llmcore/utils.py index 8d53808..1699b9b 100644 --- a/src/scouter/llmcore/utils.py +++ b/src/scouter/llmcore/utils.py @@ -1,24 +1,41 @@ +import logging import random import time from openai import APIError, RateLimitError +from .exceptions import MaxRetriesExceededError + +logger = logging.getLogger(__name__) + ERROR_MAX_RETRY = "max retries exceeded" def retry_loop(func, max_retries=5, base_delay=1): + logger.debug( + "Starting retry loop with max_retries=%d, base_delay=%d", + max_retries, + base_delay, + ) last_exception: BaseException | None = None for attempt in range(max_retries): try: - return func() + result = func() except (RateLimitError, APIError) as e: # noqa: PERF203 last_exception = e + logger.warning("Attempt %d failed: %s", attempt + 1, str(e)) if attempt == max_retries - 1: break sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5) # noqa: S311 + logger.debug("Sleeping for %.2f seconds before retry", sleep_time) time.sleep(sleep_time) + else: + if attempt > 0: + logger.info("Operation succeeded on attempt %d", attempt + 1) + return result # If we reach here, all retries failed - raise last_exception or RuntimeError(ERROR_MAX_RETRY) + logger.error("All %d retry attempts failed", max_retries) + raise last_exception or MaxRetriesExceededError(ERROR_MAX_RETRY)