diff --git a/app_main.py b/app_main.py
index b5e3713..fb12ad6 100644
--- a/app_main.py
+++ b/app_main.py
@@ -6,8 +6,12 @@
 
 from src.scouter.agent.mcp import app as mcp_app
 from src.scouter.config.llm import get_client_config
+from src.scouter.config.logging import setup_logging
 from src.scouter.ingestion.api import router as ingestion_router
 
+# Setup logging
+setup_logging()
+
 logger = logging.getLogger(__name__)
 
 config = get_client_config()
diff --git a/src/scouter/config/logging.py b/src/scouter/config/logging.py
new file mode 100644
index 0000000..9c3d973
--- /dev/null
+++ b/src/scouter/config/logging.py
@@ -0,0 +1,42 @@
+"""Logging configuration for the Scouter project."""
+
+import logging
+import sys
+
+
+def setup_logging(level: str = "INFO") -> None:
+    """Configure logging for the application.
+
+    Args:
+        level: The logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+    """
+    # Create logger
+    logger = logging.getLogger("scouter")
+    logger.setLevel(getattr(logging, level.upper()))
+
+    # Remove any existing handlers
+    for handler in logger.handlers[:]:
+        logger.removeHandler(handler)
+
+    # Create console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(getattr(logging, level.upper()))
+
+    # Create formatter
+    formatter = logging.Formatter(
+        "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+    console_handler.setFormatter(formatter)
+
+    # Add handler to logger
+    logger.addHandler(console_handler)
+
+    # Set up root logger to avoid duplicate logs
+    root_logger = logging.getLogger()
+    root_logger.setLevel(
+        logging.WARNING
+    )  # Only show warnings and above from other libraries
+
+    # Ensure scouter logger propagates
+    logger.propagate = False
diff --git a/src/scouter/llmcore/__init__.py b/src/scouter/llmcore/__init__.py
index d190412..e3765e9 100644
--- a/src/scouter/llmcore/__init__.py
+++ b/src/scouter/llmcore/__init__.py
@@ -1,6 +1,15 @@
 from .agent import AgentRun, run_agent
 from .client import ChatCompletionOptions, LLMConfig, call_llm, create_llm_client
-from .exceptions import AgentError, LLMError, ToolExecutionError
+from .exceptions import (
+    AgentError,
+    InvalidRunStateError,
+    InvalidToolDefinitionError,
+    LLMError,
+    MaxRetriesExceededError,
+    ToolExecutionError,
+)
+from .messages import create_instruction
+from .prompt import resolve_prompt
 from .tools import (
     Tool,
     create_tool,
@@ -20,6 +29,7 @@
     ChatCompletionToolMessageParam,
     ChatCompletionToolParam,
     ChatCompletionUserMessageParam,
+    Prompt,
 )
 from .utils import retry_loop
 
@@ -36,16 +46,22 @@
     "ChatCompletionToolMessageParam",
     "ChatCompletionToolParam",
     "ChatCompletionUserMessageParam",
+    "InvalidRunStateError",
+    "InvalidToolDefinitionError",
     "LLMConfig",
     "LLMError",
+    "MaxRetriesExceededError",
+    "Prompt",
     "Tool",
     "ToolExecutionError",
     "call_llm",
+    "create_instruction",
     "create_llm_client",
     "create_tool",
     "execute_tool",
     "lookup_tool",
     "register_tool",
+    "resolve_prompt",
     "retry_loop",
     "run_agent",
     "run_tool",
diff --git a/src/scouter/llmcore/agent.py b/src/scouter/llmcore/agent.py
index 246a14e..ac154d8 100644
--- a/src/scouter/llmcore/agent.py
+++ b/src/scouter/llmcore/agent.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import json
+import logging
 from dataclasses import dataclass, field
 from time import time
 from typing import TYPE_CHECKING, cast
@@ -20,8 +21,11 @@
     )
 
 from .client import ChatCompletionOptions, call_llm
+from .exceptions import InvalidRunStateError
 from .tools import run_tool
 
+logger = logging.getLogger(__name__)
+
 
 @dataclass
 class InputStep:
@@ -89,7 +93,8 @@ def total_usage(
     def last_output(self) -> str:
         if not self.steps:
             msg = "No steps in run"
-            raise ValueError(msg)
+            logger.error("Attempted to get last output from empty run")
+            raise InvalidRunStateError(msg)
         last_step = self.steps[-1]
         if isinstance(last_step, LLMStep):
             content = last_step.message.get("content")
@@ -129,6 +134,9 @@ def run_agent(
     tools: Iterable[ChatCompletionToolUnionParam] | None = None,
     options: ChatCompletionOptions | None = None,
 ):
+    logger.info(
+        "Starting agent run with model=%s, initial_steps=%d", model, len(run.steps)
+    )
     while run.continue_condition(run):
         completion: ChatCompletion = call_llm(
             model, run.conversation_history, tools, options
@@ -138,18 +146,26 @@ def run_agent(
 
         # Handle tool calls
         if msg.tool_calls:
+            logger.debug("Processing %d tool calls", len(msg.tool_calls))
             for tc in msg.tool_calls:
                 tc = cast("ChatCompletionMessageToolCall", tc)
                 args = json.loads(tc.function.arguments)
+                logger.debug(
+                    "Executing tool '%s' with args: %s", tc.function.name, args
+                )
                 start = time()
                 try:
                     output = run_tool(tc.function.name, args)
                     success = True
                     error = None
+                    logger.debug("Tool '%s' executed successfully", tc.function.name)
                 except Exception as e:  # noqa: BLE001
                     output = ""
                     success = False
                     error = str(e)
+                    logger.warning(
+                        "Tool '%s' execution failed: %s", tc.function.name, str(e)
+                    )
                 end = time()
                 run.add_step(
                     ToolStep(
@@ -162,3 +178,4 @@ def run_agent(
                         error,
                     )
                 )
+    logger.info("Agent run completed with %d total steps", len(run.steps))
diff --git a/src/scouter/llmcore/client.py b/src/scouter/llmcore/client.py
index 8944e18..fd8793e 100644
--- a/src/scouter/llmcore/client.py
+++ b/src/scouter/llmcore/client.py
@@ -1,3 +1,4 @@
+import logging
 import os
 from collections.abc import Iterable
 from dataclasses import dataclass
@@ -12,6 +13,8 @@
 
 from .utils import retry_loop
 
+logger = logging.getLogger(__name__)
+
 
 class ChatCompletionOptions(TypedDict, total=False):
     """Options for ChatCompletion API calls.
@@ -50,13 +53,20 @@ def load_from_env() -> "LLMConfig":
 
 def create_llm_client(cfg: LLMConfig | None = None) -> OpenAI:
     cfg = cfg or LLMConfig.load_from_env()
+    logger.debug(
+        "Creating LLM client with timeout=%d, max_retries=%d",
+        cfg.timeout,
+        cfg.max_retries,
+    )
 
-    return OpenAI(
+    client = OpenAI(
         api_key=cfg.api_key,
         base_url=cfg.base_url,
         timeout=cfg.timeout,
         max_retries=cfg.max_retries,
     )
+    logger.info("LLM client created successfully")
+    return client
 
 
 client = create_llm_client()
@@ -76,6 +86,13 @@ def call_llm(
         tools: Optional tools.
         options: Optional ChatCompletion options like max_tokens, temperature, etc.
     """
+    tools_count = sum(1 for _ in tools) if tools else 0
+    logger.debug(
+        "Calling LLM with model=%s, message_count=%d, tools_count=%d",
+        model,
+        len(messages),
+        tools_count,
+    )
 
     def _call():
         kwargs = options or {}
@@ -83,4 +100,6 @@ def _call():
             model=model, messages=messages, tools=tools or [], **kwargs
         )
 
-    return retry_loop(_call)
+    result = retry_loop(_call)
+    logger.debug("LLM call completed successfully")
+    return result
diff --git a/src/scouter/llmcore/exceptions.py b/src/scouter/llmcore/exceptions.py
index 417742b..98bf984 100644
--- a/src/scouter/llmcore/exceptions.py
+++ b/src/scouter/llmcore/exceptions.py
@@ -8,3 +8,15 @@ class ToolExecutionError(LLMError):
 
 class AgentError(LLMError):
     """Raised when agent operations fail."""
+
+
+class MaxRetriesExceededError(LLMError):
+    """Raised when maximum retry attempts are exceeded."""
+
+
+class InvalidRunStateError(LLMError):
+    """Raised when an agent run is in an invalid state."""
+
+
+class InvalidToolDefinitionError(LLMError):
+    """Raised when a tool is defined incorrectly."""
diff --git a/src/scouter/llmcore/messages.py b/src/scouter/llmcore/messages.py
new file mode 100644
index 0000000..034d8fd
--- /dev/null
+++ b/src/scouter/llmcore/messages.py
@@ -0,0 +1,29 @@
+import logging
+
+from .agent import InputStep, Step
+from .types import ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam
+
+logger = logging.getLogger(__name__)
+
+
+def create_instruction(
+    steps: list[Step], system: str | None = None, prompt: str | None = None
+) -> None:
+    """Add system and user messages to the steps list as InputStep instances."""
+    logger.debug(
+        "Creating instruction with system=%s, prompt=%s", bool(system), bool(prompt)
+    )
+    if system:
+        steps.append(
+            InputStep(
+                message=ChatCompletionSystemMessageParam(role="system", content=system)
+            )
+        )
+        logger.debug("Added system message to steps")
+    if prompt:
+        steps.append(
+            InputStep(
+                message=ChatCompletionUserMessageParam(role="user", content=prompt)
+            )
+        )
+        logger.debug("Added user message to steps")
diff --git a/src/scouter/llmcore/prompt.py b/src/scouter/llmcore/prompt.py
new file mode 100644
index 0000000..9904867
--- /dev/null
+++ b/src/scouter/llmcore/prompt.py
@@ -0,0 +1,16 @@
+import logging
+
+from .types import Prompt
+
+logger = logging.getLogger(__name__)
+
+
+def resolve_prompt(prompt: Prompt, *args, **kwargs) -> str:
+    """Resolve a Prompt to a string, executing callables with optional args if necessary."""
+    if isinstance(prompt, str):
+        logger.debug("Resolved string prompt directly")
+        return prompt
+    logger.debug("Executing callable prompt")
+    result = prompt(*args, **kwargs)
+    logger.debug("Callable prompt executed successfully")
+    return result
diff --git a/src/scouter/llmcore/tools.py b/src/scouter/llmcore/tools.py
index f592fed..b76fff9 100644
--- a/src/scouter/llmcore/tools.py
+++ b/src/scouter/llmcore/tools.py
@@ -3,12 +3,15 @@
 import asyncio
 import inspect
 import json
+import logging
 from collections.abc import Callable  # noqa: TC003
 from typing import TYPE_CHECKING, Any, get_origin
 
 from pydantic import BaseModel, Field
 
-from .exceptions import ToolExecutionError
+from .exceptions import InvalidToolDefinitionError, ToolExecutionError
+
+logger = logging.getLogger(__name__)
 
 if TYPE_CHECKING:
     from .types import ChatCompletionToolParam
@@ -28,11 +31,13 @@ class Tool(BaseModel):
     input_type: type[BaseModel] | None = None
 
     def model_post_init(self, /, __context) -> None:
+        logger.debug("Initializing tool '%s'", self.name)
         # 1. Extract input model from handler signature
         sig = inspect.signature(self.handler)
         if not sig.parameters:
             msg = f"Handler for tool '{self.name}' must have at least one argument (the input Pydantic model)."
-            raise TypeError(msg)
+            logger.error("Tool validation failed: %s", msg)
+            raise InvalidToolDefinitionError(msg)
 
         param = next(iter(sig.parameters.values()))
         input_model = param.annotation
@@ -40,7 +45,8 @@ def model_post_init(self, /, __context) -> None:
         origin = get_origin(input_model) or input_model
         if not (isinstance(origin, type) and issubclass(origin, BaseModel)):
             msg = f"Handler first param for '{self.name}' must be a Pydantic BaseModel, got {origin}"
-            raise TypeError(msg)
+            logger.error("Tool validation failed: %s", msg)
+            raise InvalidToolDefinitionError(msg)
 
         self.input_type = origin  # SAVE THIS for execute_tool
 
@@ -53,7 +59,8 @@ def model_post_init(self, /, __context) -> None:
             pass  # Allow BaseModel
         else:
             msg = f"Handler for '{self.name}' must return a Pydantic BaseModel or str"
-            raise TypeError(msg)
+            logger.error("Tool validation failed: %s", msg)
+            raise InvalidToolDefinitionError(msg)
 
         # 3. Auto-fill everything
         self.parameters_schema = origin.model_json_schema()
@@ -74,6 +81,7 @@ def model_post_init(self, /, __context) -> None:
                 f"The tool will **always return JSON matching this exact schema**:\n"
                 f"```json\n{pretty_output}\n```"
             )
+        logger.debug("Tool '%s' initialized successfully", self.name)
 
     def openai_tool_spec(self) -> ChatCompletionToolParam:
         return {
@@ -120,8 +128,11 @@ def run_tool(name: str, raw_args: dict[str, Any]) -> str:
     """
     Looks up a tool by name and executes it.
     """
+    logger.debug("Running tool '%s' with args: %s", name, raw_args)
     tool_instance = lookup_tool(name)
-    return execute_tool(tool_instance, raw_args)
+    result = execute_tool(tool_instance, raw_args)
+    logger.debug("Tool '%s' completed successfully", name)
+    return result
 
 
 def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
@@ -132,6 +143,7 @@ def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
     3. Gets OutputModel or str.
     4. Returns OutputModel.model_dump_json() or the str.
     """
+    logger.debug("Executing tool '%s'", tool_instance.name)
     try:
         # 1. Instantiate the specific input model
         input_model_cls = tool_instance.input_type
@@ -153,10 +165,14 @@ def execute_tool(tool_instance: Tool, raw_args: dict[str, Any]) -> str:
 
         # 4. Serialize Output
         if isinstance(result_model, str):
+            logger.debug("Tool '%s' returned string result", tool_instance.name)
             return result_model
-        return result_model.model_dump_json()
+        result = result_model.model_dump_json()
+        logger.debug("Tool '%s' returned JSON result", tool_instance.name)
+        return result
 
     except Exception as e:
+        logger.exception("Tool '%s' execution failed", tool_instance.name)
         msg = f"Error executing tool '{tool_instance.name}': {e!s}"
         raise ToolExecutionError(msg) from e
 
@@ -171,8 +187,10 @@ def register_tool(tool_instance: Tool) -> None:
     """
     if not tool_instance.name:
         msg = "Cannot register tool without a name."
+        logger.error("Tool registration failed: %s", msg)
         raise ToolExecutionError(msg)
     TOOL_REGISTRY[tool_instance.name] = tool_instance
+    logger.info("Tool '%s' registered successfully", tool_instance.name)
 
 
 def lookup_tool(name: str) -> Tool:
diff --git a/src/scouter/llmcore/types.py b/src/scouter/llmcore/types.py
index 6b22afb..899b47e 100644
--- a/src/scouter/llmcore/types.py
+++ b/src/scouter/llmcore/types.py
@@ -1,3 +1,5 @@
+from collections.abc import Callable
+
 # Re-export OpenAI types
 from openai.types.chat import (
     ChatCompletion,
@@ -13,6 +15,9 @@
     ChatCompletionMessageToolCall,
 )
 
+# Custom types
+Prompt = str | Callable[[], str]
+
 __all__ = [
     "ChatCompletion",
     "ChatCompletionAssistantMessageParam",
@@ -23,4 +28,5 @@
     "ChatCompletionToolMessageParam",
     "ChatCompletionToolParam",
     "ChatCompletionUserMessageParam",
+    "Prompt",
 ]
diff --git a/src/scouter/llmcore/utils.py b/src/scouter/llmcore/utils.py
index 8d53808..1699b9b 100644
--- a/src/scouter/llmcore/utils.py
+++ b/src/scouter/llmcore/utils.py
@@ -1,24 +1,41 @@
+import logging
 import random
 import time
 
 from openai import APIError, RateLimitError
 
+from .exceptions import MaxRetriesExceededError
+
+logger = logging.getLogger(__name__)
+
 ERROR_MAX_RETRY = "max retries exceeded"
 
 
 def retry_loop(func, max_retries=5, base_delay=1):
+    logger.debug(
+        "Starting retry loop with max_retries=%d, base_delay=%d",
+        max_retries,
+        base_delay,
+    )
     last_exception: BaseException | None = None
 
     for attempt in range(max_retries):
         try:
-            return func()
+            result = func()
         except (RateLimitError, APIError) as e:  # noqa: PERF203
             last_exception = e
+            logger.warning("Attempt %d failed: %s", attempt + 1, str(e))
             if attempt == max_retries - 1:
                 break
 
             sleep_time = base_delay * (2**attempt) + random.uniform(0, 0.5)  # noqa: S311
+            logger.debug("Sleeping for %.2f seconds before retry", sleep_time)
             time.sleep(sleep_time)
+        else:
+            if attempt > 0:
+                logger.info("Operation succeeded on attempt %d", attempt + 1)
+            return result
 
     # If we reach here, all retries failed
-    raise last_exception or RuntimeError(ERROR_MAX_RETRY)
+    logger.error("All %d retry attempts failed", max_retries)
+    raise last_exception or MaxRetriesExceededError(ERROR_MAX_RETRY)