diff --git a/src/assertion.ts b/src/assertion.ts
index f20b765..712ca08 100644
--- a/src/assertion.ts
+++ b/src/assertion.ts
@@ -6,6 +6,7 @@ import { logger } from "./logger";
 import { resolveModel } from "./models";
 import { AssertionResult, AssertionOptions } from "./types";
 import { resolvePage, safeSnapshot, withTimeout } from "./utils";
+import { trackUsage } from "./cost";
 
 const assertionSchema = z.object({
   assertionPassed: z.boolean().describe("Indicates whether the assertion passed or not."),
@@ -130,8 +131,9 @@ Never hallucinate. Be truthful and if you are not sure, use a low confidence sco
     // Claude assertion function
     const getClaudeAssertion = async (): Promise<AssertionResult> => {
       // First get Claude's text response with thinking if enabled
-      const { text } = await generateText({
-        model: resolveModel(getModelId("assertionPrimary")),
+      const modelId = getModelId("assertionPrimary");
+      const result = await generateText({
+        model: resolveModel(modelId),
         temperature: 0,
         providerOptions: thinkingEnabled
           ? {
@@ -146,21 +148,30 @@ Never hallucinate. Be truthful and if you are not sure, use a low confidence sco
         messages,
       });
 
+      if (result.usage) {
+        await trackUsage(modelId, result.usage);
+      }
+
       // Convert Claude's response to structured format using Haiku
-      const { output } = await generateText({
-        model: resolveModel(getModelId("assertionPrimary")),
+      const haikuResult = await generateText({
+        model: resolveModel(modelId),
         temperature: 0.1,
-        prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${text}`,
+        prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${result.text}`,
         output: Output.object({ schema: assertionSchema }),
       });
 
-      return output;
+      if (haikuResult.usage) {
+        await trackUsage(modelId, haikuResult.usage);
+      }
+
+      return haikuResult.output;
     };
 
     // Gemini assertion function
     const getGeminiAssertion = async (): Promise<AssertionResult> => {
-      const { output } = await generateText({
-        model: resolveModel(getModelId("assertionSecondary")),
+      const modelId = getModelId("assertionSecondary");
+      const result = await generateText({
+        model: resolveModel(modelId),
         temperature: 0,
         providerOptions: thinkingEnabled
           ? {
@@ -178,7 +189,11 @@ Never hallucinate. Be truthful and if you are not sure, use a low confidence sco
         output: Output.object({ schema: assertionSchema }),
       });
 
-      return output;
+      if (result.usage) {
+        await trackUsage(modelId, result.usage);
+      }
+
+      return result.output;
     };
 
     // Arbiter function using Gemini 2.5 Pro with thinking enabled
@@ -241,8 +256,9 @@ Please carefully review the evidence (screenshot and accessibility snapshot (whe
         },
       ];
 
-      const { output } = await generateText({
-        model: resolveModel(getModelId("assertionArbiter")),
+      const modelId = getModelId("assertionArbiter");
+      const result = await generateText({
+        model: resolveModel(modelId),
         temperature: 0,
         providerOptions: {
           google: {
@@ -258,7 +274,11 @@ Please carefully review the evidence (screenshot and accessibility snapshot (whe
         output: Output.object({ schema: assertionSchema }),
       });
 
-      return output;
+      if (result.usage) {
+        await trackUsage(modelId, result.usage);
+      }
+
+      return result.output;
     };
 
     const runAssertion = async (attempt = 0): Promise<AssertionResult> => {
diff --git a/src/config.ts b/src/config.ts
index 82c6974..ec0de2a 100644
--- a/src/config.ts
+++ b/src/config.ts
@@ -40,6 +40,8 @@ export type ModelConfig = {
    * Override may be re-enabled in a future release.
    */
   cua?: string;
+  /** Model for explaining visual regression failures. Default: google/gemini-3-flash */
+  visualRegressionExplanation?: string;
 };
 
 export const DEFAULT_MODELS: Required<ModelConfig> = {
@@ -51,6 +53,7 @@ export const DEFAULT_MODELS: Required<ModelConfig> = {
   assertionArbiter: "google/gemini-3.1-pro-preview",
   utility: "google/gemini-2.5-flash",
   cua: "gpt-5.5",
+  visualRegressionExplanation: "google/gemini-3-flash",
 };
 
 /**
diff --git a/src/constants.ts b/src/constants.ts
index 195521f..732b1d9 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -9,6 +9,7 @@ export const INITIAL_DOM_STABILIZATION_IDLE = 3000;
 export const ASSERTION_MODEL_TIMEOUT = 35000;
 export const STEP_EXECUTION_TIMEOUT = 180000;
 export const WAIT_CONDITION_TIMEOUT = 120000;
+export const VISUAL_DIFF_EXPLANATION_TIMEOUT = 45000;
 export const WAIT_CONDITION_INITIAL_INTERVAL = 1000;
 export const WAIT_CONDITION_MAX_INTERVAL = 10000;
 export const EMAIL_INITIAL_WAIT = 5000;
diff --git a/src/cost.ts b/src/cost.ts
new file mode 100644
index 0000000..143607f
--- /dev/null
+++ b/src/cost.ts
@@ -0,0 +1,91 @@
+import { logger } from "./logger";
+import { redis } from "./redis";
+
+export interface ModelPricing {
+  promptTokenPricePerMillion: number;
+  completionTokenPricePerMillion: number;
+}
+
+/**
+ * Default pricing for models used in Passmark.
+ * Prices are in USD per 1 million tokens.
+ * Data sourced from provider documentation as of April 2026.
+ */
+export const DEFAULT_PRICING: Record<string, ModelPricing> = {
+  "google/gemini-3-flash": {
+    promptTokenPricePerMillion: 0.1,
+    completionTokenPricePerMillion: 0.4,
+  },
+  "google/gemini-3.1-pro-preview": {
+    promptTokenPricePerMillion: 1.25,
+    completionTokenPricePerMillion: 5.0,
+  },
+  "anthropic/claude-haiku-4.5": {
+    promptTokenPricePerMillion: 0.25,
+    completionTokenPricePerMillion: 1.25,
+  },
+  "google/gemini-2.5-flash": {
+    promptTokenPricePerMillion: 0.1,
+    completionTokenPricePerMillion: 0.4,
+  },
+  "gpt-5.5": {
+    promptTokenPricePerMillion: 2.5,
+    completionTokenPricePerMillion: 10.0,
+  },
+};
+
+export interface Usage {
+  promptTokens?: number;
+  completionTokens?: number;
+  totalTokens?: number;
+}
+
+/**
+ * Tracks LLM usage and calculates the cost of the call.
+ * Updates a global cost counter in Redis if available for cross-worker synchronization.
+ *
+ * @param modelId - The canonical model ID (e.g. "google/gemini-3-flash")
+ * @param usage - Token usage data from the AI SDK
+ */
+export async function trackUsage(modelId: string, usage: Usage) {
+  const pricing = DEFAULT_PRICING[modelId] || DEFAULT_PRICING["google/gemini-3-flash"];
+  
+  const promptTokens = usage.promptTokens ?? 0;
+  const completionTokens = usage.completionTokens ?? 0;
+  
+  const promptCost = (promptTokens / 1_000_000) * pricing.promptTokenPricePerMillion;
+  const completionCost = (completionTokens / 1_000_000) * pricing.completionTokenPricePerMillion;
+  const totalCost = promptCost + completionCost;
+
+  logger.debug(
+    `[Cost] Model: ${modelId} | Prompt: ${promptTokens} | Completion: ${completionTokens} | Cost: $${totalCost.toFixed(6)}`
+  );
+
+  if (redis) {
+    try {
+      // Use a global key to track cumulative cost across all test workers
+      const executionId = process.env.executionId || "default";
+      const costKey = `cost:total:${executionId}`;
+      const modelCostKey = `cost:model:${modelId}:${executionId}`;
+
+      await Promise.all([
+        redis.incrbyfloat(costKey, totalCost),
+        redis.incrbyfloat(modelCostKey, totalCost),
+      ]);
+    } catch (err) {
+      logger.warn(`Failed to update cost in Redis: ${err}`);
+    }
+  }
+}
+
+/**
+ * Retrieves the total estimated cost for the current execution.
+ * @returns Total cost in USD
+ */
+export async function getTotalCost(): Promise<number> {
+  if (!redis) return 0;
+  
+  const executionId = process.env.executionId || "default";
+  const cost = await redis.get(`cost:total:${executionId}`);
+  return cost ? parseFloat(cost) : 0;
+}
diff --git a/src/errors.ts b/src/errors.ts
index f10ced4..de7f174 100644
--- a/src/errors.ts
+++ b/src/errors.ts
@@ -89,4 +89,13 @@ export class ValidationError extends PassmarkError {
   constructor(message: string) {
     super(message, "VALIDATION_ERROR");
   }
+}
+
+/**
+ * Thrown when visual regression explanation fails.
+ */
+export class VisualRegressionError extends PassmarkError {
+  constructor(message: string) {
+    super(message, "VISUAL_REGRESSION_ERROR");
+  }
 }
\ No newline at end of file
diff --git a/src/extract.ts b/src/extract.ts
index abff50d..1d0b677 100644
--- a/src/extract.ts
+++ b/src/extract.ts
@@ -2,6 +2,7 @@ import { generateText, Output } from "ai";
 import { z } from "zod";
 import { getModelId } from "./config";
 import { resolveModel } from "./models";
+import { trackUsage } from "./cost";
 
 const extractionSchema = z.object({
   extractedValue: z.string().describe("The extracted value based on the prompt"),
@@ -35,8 +36,9 @@ export async function extractDataWithAI({
   url: string;
   prompt: string;
 }): Promise<string> {
-  const { output } = await generateText({
-    model: resolveModel(getModelId("utility")),
+  const modelId = getModelId("utility");
+  const result = await generateText({
+    model: resolveModel(modelId),
     temperature: 0,
     output: Output.object({ schema: extractionSchema }),
     prompt: `You are an AI assistant that extracts specific data from web pages.
@@ -66,5 +68,9 @@ ${prompt}
 Return the extracted value.`,
   });
 
-  return output.extractedValue;
+  if (result.usage) {
+    await trackUsage(modelId, result.usage);
+  }
+
+  return result.output.extractedValue;
 }
diff --git a/src/index.ts b/src/index.ts
index c069c9e..c76e27e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -13,13 +13,7 @@ import { withSpan } from "axiom/ai";
 import shortid from "shortid";
 import { axiomEnabled } from "./instrumentation";
 
-// Only use withSpan when Axiom is configured, otherwise just execute the function directly
-async function maybeWithSpan<T>(
-  meta: { capability: string; step: string },
-  fn: () => Promise<T>,
-): Promise<T> {
-  return axiomEnabled ? withSpan(meta, async () => fn()) : fn();
-}
+import { maybeWithSpan } from "./utils/telemetry";
 import { z } from "zod";
 import { buildRunStepsPrompt, buildRunUserFlowPrompt } from "./prompts";
 import { redis } from "./redis";
@@ -45,6 +39,8 @@ import { runCUALoop, buildRunStepsPromptCUA, buildRunUserFlowPromptCUA } from ".
 import { extractDataWithAI } from "./extract";
 import { logger } from "./logger";
 import { resolveModel } from "./models";
+export * from "./visual";
+import { trackUsage } from "./cost";
 import { runSecureScript } from "./utils/secure-script-runner";
 import { createTabManager } from "./utils/tab-manager";
 import {
@@ -460,10 +456,10 @@ export const runSteps = async ({
       );
     }
 
-    const stepModelId = effectiveAi.getModelId("stepExecution");
-    const model = resolveModel(stepModelId, effectiveAi.gateway);
+    const modelId = getModelId("stepExecution");
+    const model = resolveModel(modelId);
     logger.debug(
-      `Using model: ${stepModelId} for step execution / gateway: ${effectiveAi.gateway}`,
+      `Using model: ${modelId} for step execution / gateway: ${getConfig().ai?.gateway ?? "none"}`,
     );
 
     try {
@@ -514,6 +510,10 @@ export const runSteps = async ({
           }),
       );
 
+      if (result.usage) {
+        await trackUsage(modelId, result.usage);
+      }
+
       // Cache the step action only if it was a single tool call (simple, deterministic action).
       // Multi-step actions are not cached as they may be non-deterministic.
       const allToolCalls = result.steps
@@ -680,8 +680,9 @@ export const runUserFlow = async ({
       );
 
       if (assertion) {
-        const { output } = await generateText({
-          model: resolveModel(effectiveAi.getModelId("utility"), effectiveAi.gateway),
+        const utilityModelId = getModelId("utility");
+        const { output, usage } = await generateText({
+          model: resolveModel(utilityModelId),
           prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${text}`,
           output: Output.object({
             schema: z.object({
@@ -695,6 +696,11 @@ export const runUserFlow = async ({
             }),
           }),
         });
+        
+        if (usage) {
+          await trackUsage(utilityModelId, usage);
+        }
+
         return output;
       }
 
@@ -715,7 +721,7 @@ export const runUserFlow = async ({
   });
 
   try {
-    const { text } = await maybeWithSpan(
+    const result = await maybeWithSpan(
       { capability: "user_flow_execution", step: "agentic_tool_calling" },
       async () => {
         return generateText({
@@ -758,10 +764,18 @@ export const runUserFlow = async ({
       },
     );
 
+    if (result.usage) {
+      await trackUsage(
+        effort === "low" ? getModelId("userFlowLow") : getModelId("userFlowHigh"),
+        result.usage,
+      );
+    }
+
     if (assertion) {
-      const { output } = await generateText({
-        model: resolveModel(effectiveAi.getModelId("utility"), effectiveAi.gateway),
-        prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${text}`,
+      const utilityModelId = getModelId("utility");
+      const { output, usage } = await generateText({
+        model: resolveModel(utilityModelId),
+        prompt: `Convert the following text output into a valid JSON object with the specified properties:\n\n${result.text}`,
         output: Output.object({
           schema: z.object({
             assertionPassed: z.boolean().describe("Indicates whether the assertion passed or not."),
@@ -775,10 +789,14 @@ export const runUserFlow = async ({
         }),
       });
 
+      if (usage) {
+        await trackUsage(utilityModelId, usage);
+      }
+
       return output;
     }
 
-    return text;
+    return result.text;
   } catch (error: unknown) {
     logger.error({ err: error }, "Error during user flow execution");
   }
diff --git a/src/types.ts b/src/types.ts
index e4ff76e..041c1cd 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -141,3 +141,20 @@ export type RunStepsOptions = {
     }
     | { assertions?: never; expect?: never }
   );
+
+export type VisualExplanationResult = {
+  explanation: string;
+  isBug: boolean;
+  confidence: number;
+  diffAreas?: string[];
+};
+
+export type VisualDiffOptions = {
+  page: Page;
+  expectedImage?: string | Buffer;
+  actualImage?: string | Buffer;
+  test?: TestType<
+    PlaywrightTestArgs & PlaywrightTestOptions,
+    PlaywrightWorkerArgs & PlaywrightWorkerOptions
+  >;
+};
diff --git a/src/utils/telemetry.ts b/src/utils/telemetry.ts
new file mode 100644
index 0000000..6ef7ef9
--- /dev/null
+++ b/src/utils/telemetry.ts
@@ -0,0 +1,16 @@
+import { withSpan } from "axiom/ai";
+import { axiomEnabled } from "../instrumentation";
+
+/**
+ * Executes a function within an Axiom span if instrumentation is enabled.
+ * If Axiom is not configured, simply executes the function directly.
+ *
+ * @param meta - Span metadata including capability and step name
+ * @param fn - The function to execute
+ */
+export async function maybeWithSpan<T>(
+  meta: { capability: string; step: string },
+  fn: () => Promise<T>,
+): Promise<T> {
+  return axiomEnabled ? withSpan(meta, async () => fn()) : fn();
+}
diff --git a/src/visual.ts b/src/visual.ts
new file mode 100644
index 0000000..0556c99
--- /dev/null
+++ b/src/visual.ts
@@ -0,0 +1,200 @@
+import { generateText, Output } from "ai";
+import { resolveModel } from "./models";
+import { getModelId } from "./config";
+import { logger } from "./logger";
+import { trackUsage } from "./cost";
+import fs from "fs";
+import { z } from "zod";
+import { VisualDiffOptions, VisualExplanationResult } from "./types";
+import { maybeWithSpan } from "./utils/telemetry";
+import { withTimeout } from "./utils";
+import { VISUAL_DIFF_EXPLANATION_TIMEOUT } from "./constants";
+import { VisualRegressionError } from "./errors";
+
+const visualExplanationSchema = z.object({
+  explanation: z
+    .string()
+    .describe("A human-readable explanation of the visual differences between the two images."),
+  isBug: z
+    .boolean()
+    .describe("Whether the change appears to be a functional bug or a legitimate UI tweak."),
+  confidence: z.number().min(0).max(100).describe("Confidence score of the explanation (0-100)."),
+  diffAreas: z
+    .array(z.string())
+    .optional()
+    .describe("List of specific areas or elements that changed."),
+});
+
+/**
+ * Resolves various image input formats into a Buffer.
+ * Supports file paths, base64 data URLs, and Buffers.
+ */
+async function resolveImageBuffer(input: string | Buffer): Promise<Buffer> {
+  if (Buffer.isBuffer(input)) {
+    return input;
+  }
+
+  // Handle data URL (e.g. "data:image/png;base64,...")
+  if (input.startsWith("data:")) {
+    const base64 = input.split(",")[1];
+    if (!base64) {
+      throw new VisualRegressionError("Invalid base64 data URL provided as image input.");
+    }
+    return Buffer.from(base64, "base64");
+  }
+
+  // Handle file path
+  if (fs.existsSync(input)) {
+    try {
+      return fs.readFileSync(input);
+    } catch (err) {
+      throw new VisualRegressionError(`Failed to read image from path: ${input}. ${err}`);
+    }
+  }
+
+  // If it doesn't look like a path and isn't a Buffer/DataURL, maybe it's raw base64
+  try {
+    return Buffer.from(input, "base64");
+  } catch (err) {
+    throw new VisualRegressionError(
+      `Unable to resolve image input. It is not a valid path, Buffer, or base64 string.`,
+    );
+  }
+}
+
+/**
+ * Explains the visual differences between an "Expected" screenshot and the "Actual" page state.
+ * Uses a Vision-capable AI model to generate a human-readable explanation, helping QA teams
+ * distinguish between minor styling tweaks and breaking visual bugs.
+ *
+ * @param options - Configuration for the visual diff explanation
+ * @param options.page - The Playwright page instance
+ * @param options.expectedImage - The baseline image (path, Buffer, or base64)
+ * @param options.actualImage - The failed state image (path, Buffer, or base64). If omitted, takes a fresh screenshot.
+ * @param options.test - Playwright test instance for attaching rich annotations to the report.
+ * @returns A structured explanation of the visual differences.
+ *
+ * @example
+ * ```typescript
+ * try {
+ *   await expect(page).toHaveScreenshot('landing.png');
+ * } catch (error) {
+ *   await explainVisualDiff({
+ *     page,
+ *     expectedImage: 'test-snapshots/landing-linux.png',
+ *     test,
+ *   });
+ *   throw error;
+ * }
+ * ```
+ */
+export async function explainVisualDiff({
+  page,
+  expectedImage,
+  actualImage,
+  test,
+}: VisualDiffOptions): Promise<VisualExplanationResult> {
+  return maybeWithSpan({ capability: "visual_regression", step: "explain_diff" }, async () => {
+    logger.info("Generating AI explanation for visual regression failure...");
+
+    try {
+      // 1. Resolve images to Buffers
+      let expectedBuffer: Buffer | undefined;
+      if (expectedImage) {
+        expectedBuffer = await resolveImageBuffer(expectedImage);
+      }
+
+      let actualBuffer: Buffer;
+      if (actualImage) {
+        actualBuffer = await resolveImageBuffer(actualImage);
+      } else {
+        logger.debug("No actual image provided, taking fresh screenshot...");
+        actualBuffer = await page.screenshot({ fullPage: false });
+      }
+
+      // 2. Prepare model
+      const modelId = getModelId("visualRegressionExplanation");
+      const model = resolveModel(modelId);
+
+      const prompt = `
+You are an elite QA Automation Engineer and Visual UX Expert.
+You have been tasked with explaining why a visual regression test failed.
+
+Attached are two images:
+1. **Expected Image (Baseline)**: The reference point that represents the "correct" UI state.
+2. **Actual Image (Current)**: The current state of the application which failed the pixel-diff check.
+
+### Objective
+Provide a precise, human-readable explanation of the differences. Your goal is to help a developer or product manager quickly understand if this is a regression bug or a planned UI update.
+
+### Analysis Requirements
+- **Spatial Changes**: Note if elements moved, swapped positions, or if margins/padding changed.
+- **Visual Styles**: Identify changes in fonts, colors, border-radius, or shadows.
+- **Content Changes**: Note if text changed, icons were swapped, or images are missing.
+- **Layout Integrity**: Identify if the layout broke or if elements are overlapping unexpectedly.
+
+### Output Format
+Be concise but technical. Avoid fluff.
+
+<OutputFormat>
+- \`explanation\`: A 2-3 sentence summary of the core differences.
+- \`isBug\`: Boolean (true if it looks like a broken layout/missing asset, false if it looks like a clean styling update).
+- \`confidence\`: 0-100 score of your assessment.
+- \`diffAreas\`: Optional list of specific components or selectors that appear to have changed.
+</OutputFormat>
+`;
+
+      // 3. Execute model call with timeout
+      const result = await withTimeout(
+        generateText({
+          model,
+          messages: [
+            {
+              role: "user",
+              content: [
+                { type: "text", text: prompt },
+                ...(expectedBuffer ? [{ type: "image" as const, image: expectedBuffer }] : []),
+                { type: "image" as const, image: actualBuffer },
+              ],
+            },
+          ],
+          output: Output.object({ schema: visualExplanationSchema }),
+        }),
+        VISUAL_DIFF_EXPLANATION_TIMEOUT,
+      );
+
+      if (result.usage) {
+        await trackUsage(modelId, result.usage);
+      }
+
+      const { explanation, isBug, confidence, diffAreas } = result.output;
+
+      // 4. Format for Playwright Report
+      const areasStr =
+        diffAreas && diffAreas.length > 0
+          ? `\n\n**Impacted Areas:**\n${diffAreas.map((a) => `- ${a}`).join("\n")}`
+          : "";
+
+      const judgmentEmoji = isBug ? "🚨 **Lately a BUG**" : "✨ **Likely a STYLING TWEAK**";
+      const summary = `### 🔍 AI Visual Diff Analysis\n\n**Confidence:** ${confidence}%\n\n**Explanation:**\n${explanation}\n\n**Judgment:** ${judgmentEmoji}${areasStr}\n\n---\n*Analysis generated by Passmark AI Regression Engine*`;
+
+      if (test) {
+        test.info().annotations.push({
+          type: "Visual Diff Analysis",
+          description: summary,
+        });
+      }
+
+      logger.info(`Successfully generated visual diff explanation (${confidence}% confidence).`);
+      return result.output;
+    } catch (error: unknown) {
+      const message = error instanceof Error ? error.message : String(error);
+      logger.error(`Failed to generate visual diff explanation: ${message}`);
+
+      if (error instanceof VisualRegressionError) {
+        throw error;
+      }
+      throw new VisualRegressionError(`AI visual analysis failed: ${message}`);
+    }
+  });
+}