diff --git a/packages/core/src/chat.inline.test.ts b/packages/core/src/chat.inline.test.ts
new file mode 100644
index 0000000000..8b2a63b71f
--- /dev/null
+++ b/packages/core/src/chat.inline.test.ts
@@ -0,0 +1,85 @@
+import { describe, it } from "node:test"
+import assert from "node:assert"
+import { mergeGenerationOptions } from "./chat.js"
+import { runtimeHost } from "./host.js"
+
+describe("mergeGenerationOptions inline prompt fix", () => {
+    it("should ignore script model for inline prompts when no explicit model", () => {
+        const options = { 
+            model: "small",  // script model
+            inner: false,
+            trace: {} as any,
+            stats: {} as any
+        }
+        const runOptions = {}  // no explicit model in inline prompt
+        
+        // Call with inner=true (indicating inline prompt)
+        const result = mergeGenerationOptions(options, runOptions, true)
+        
+        // Should use default large model, not script model
+        assert.notStrictEqual(result.model, "small")
+        assert.strictEqual(result.model, runtimeHost.modelAliases.large.model)
+    })
+    
+    it("should use explicit model for inline prompts", () => {
+        const options = { 
+            model: "small",  // script model
+            inner: false,
+            trace: {} as any,
+            stats: {} as any
+        }
+        const runOptions = { model: "explicit-model" }  // explicit model in inline prompt
+        
+        // Call with inner=true (indicating inline prompt)
+        const result = mergeGenerationOptions(options, runOptions, true)
+        
+        // Should use explicit model
+        assert.strictEqual(result.model, "explicit-model")
+    })
+    
+    it("should still use script model for main execution", () => {
+        const options = { 
+            model: "small",  // script model
+            inner: false,
+            trace: {} as any,
+            stats: {} as any
+        }
+        const runOptions = {}  // no explicit model
+        
+        // Call with inner=false or undefined (indicating main script execution)
+        const result = mergeGenerationOptions(options, runOptions, false)
+        
+        // Should use script model
+        assert.strictEqual(result.model, "small")
+    })
+    
+    it("should work with undefined inner parameter (legacy compatibility)", () => {
+        const options = { 
+            model: "small",  // script model
+            inner: false,
+            trace: {} as any,
+            stats: {} as any
+        }
+        const runOptions = {}  // no explicit model
+        
+        // Call with undefined inner (should behave like main script execution)
+        const result = mergeGenerationOptions(options, runOptions)
+        
+        // Should use script model (legacy behavior)
+        assert.strictEqual(result.model, "small")
+    })
+    
+    it("should handle null/undefined options gracefully", () => {
+        const options = { 
+            model: "small",
+            inner: false,
+            trace: {} as any,
+            stats: {} as any
+        }
+        const runOptions = null
+        
+        // Should not throw and should use fallback behavior
+        const result = mergeGenerationOptions(options, runOptions, true)
+        assert.strictEqual(result.model, runtimeHost.modelAliases.large.model)
+    })
+})
\ No newline at end of file
diff --git a/packages/core/src/chat.ts b/packages/core/src/chat.ts
index ff180605ca..be7bf37196 100644
--- a/packages/core/src/chat.ts
+++ b/packages/core/src/chat.ts
@@ -1055,14 +1055,16 @@ async function processChatMessage(
  */
 export function mergeGenerationOptions(
     options: GenerationOptions,
-    runOptions: ModelOptions & EmbeddingsModelOptions
+    runOptions: ModelOptions & EmbeddingsModelOptions,
+    inner?: boolean
 ): GenerationOptions {
     const res = {
         ...options,
         ...(runOptions || {}),
         model:
             runOptions?.model ??
-            options?.model ??
+            // For inline prompts (inner=true), skip script model and use default large model
+            (inner ? runtimeHost.modelAliases.large.model : options?.model) ??
             runtimeHost.modelAliases.large.model,
         temperature:
             runOptions?.temperature ??
diff --git a/packages/core/src/env.ts b/packages/core/src/env.ts
index fea019428c..a20f9eba4e 100644
--- a/packages/core/src/env.ts
+++ b/packages/core/src/env.ts
@@ -776,11 +776,12 @@ export async function parseTokenFromEnv(
         if (!URL.canParse(base)) {
             throw new Error(`${base} must be a valid URL`)
         }
+        const token = env.LITELLM_API_KEY;
         return {
             provider,
             model,
             base,
-            token: MODEL_PROVIDER_LITELLM,
+            token,
             type: "openai",
             source: "default",
         }
diff --git a/packages/core/src/runpromptcontext.ts b/packages/core/src/runpromptcontext.ts
index 8f568a0152..d163f41827 100644
--- a/packages/core/src/runpromptcontext.ts
+++ b/packages/core/src/runpromptcontext.ts
@@ -899,7 +899,7 @@ export function createChatGenerationContext(
         try {
             infoCb?.({ text: label || "prompt" })
 
-            const genOptions = mergeGenerationOptions(options, runOptions)
+            const genOptions = mergeGenerationOptions(options, runOptions, true)
             genOptions.inner = true
             genOptions.trace = runTrace
             const { info, configuration } = await resolveModelConnectionInfo(