langchain-ai
diff --git a/‎.changeset/kind-jokes-attack.md‎
Lines changed: 5 additions & 0 deletions b/‎.changeset/kind-jokes-attack.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎libs/providers/langchain-openai/README.md‎
Lines changed: 79 additions & 0 deletions b/‎libs/providers/langchain-openai/README.md‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎libs/providers/langchain-openai/src/tools/index.ts‎
Lines changed: 11 additions & 0 deletions b/‎libs/providers/langchain-openai/src/tools/index.ts‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎libs/providers/langchain-openai/src/tools/shell.ts‎
Lines changed: 266 additions & 0 deletions b/‎libs/providers/langchain-openai/src/tools/shell.ts‎
Lines changed: 266 additions & 0 deletions
@@ -0,0 +1,5 @@
+---
+"@langchain/openai": minor
+---
+
+feat(openai): add support for shell tool
@@ -508,6 +508,85 @@ const response = await llmWithShell.invoke(
 
 For more information, see [OpenAI's Local Shell Documentation](https://platform.openai.com/docs/guides/tools-local-shell).
 
+### Shell Tool
+
+The Shell tool allows models to run shell commands through your integration. Unlike Local Shell, this tool supports executing multiple commands concurrently and is designed for `gpt-5.1`.
+
+> **Security Warning**: Running arbitrary shell commands can be dangerous. Always sandbox execution or add strict allow/deny-lists before forwarding commands to the system shell.
+
+**Use cases**:
+
+- **Automating filesystem or process diagnostics** – e.g., "find the largest PDF under ~/Documents"
+- **Extending model capabilities** – Using built-in UNIX utilities, Python runtime, and other CLIs
+- **Running multi-step build and test flows** – Chaining commands like `pip install` and `pytest`
+- **Complex agentic coding workflows** – Using with `apply_patch` for file operations
+
+```typescript
+import { ChatOpenAI, tools } from "@langchain/openai";
+import { exec } from "node:child_process/promises";
+
+const model = new ChatOpenAI({ model: "gpt-5.1" });
+
+// With execute callback for automatic command handling
+const shellTool = tools.shell({
+  execute: async (action) => {
+    const outputs = await Promise.all(
+      action.commands.map(async (cmd) => {
+        try {
+          const { stdout, stderr } = await exec(cmd, {
+            timeout: action.timeout_ms ?? undefined,
+          });
+          return {
+            stdout,
+            stderr,
+            outcome: { type: "exit" as const, exit_code: 0 },
+          };
+        } catch (error) {
+          const timedOut = error.killed && error.signal === "SIGTERM";
+          return {
+            stdout: error.stdout ?? "",
+            stderr: error.stderr ?? String(error),
+            outcome: timedOut
+              ? { type: "timeout" as const }
+              : { type: "exit" as const, exit_code: error.code ?? 1 },
+          };
+        }
+      })
+    );
+    return {
+      output: outputs,
+      maxOutputLength: action.max_output_length,
+    };
+  },
+});
+
+const llmWithShell = model.bindTools([shellTool]);
+const response = await llmWithShell.invoke(
+  "Find the largest PDF file in ~/Documents"
+);
+```
+
+**Action properties**: The model returns actions with these properties:
+
+- `commands` - Array of shell commands to execute (can run concurrently)
+- `timeout_ms` - Optional timeout in milliseconds (enforce your own limits)
+- `max_output_length` - Optional maximum characters to return per command
+
+**Return format**: Your execute function should return a `ShellResult`:
+
+```typescript
+interface ShellResult {
+  output: Array<{
+    stdout: string;
+    stderr: string;
+    outcome: { type: "exit"; exit_code: number } | { type: "timeout" };
+  }>;
+  maxOutputLength?: number | null; // Pass back from action if provided
+}
+```
+
+For more information, see [OpenAI's Shell Documentation](https://platform.openai.com/docs/guides/tools-shell).
+
 ### Apply Patch Tool
 
 The Apply Patch tool allows models to propose structured diffs that your integration applies. This enables iterative, multi-step code editing workflows where the model can create, update, and delete files in your codebase.
 
@@ -68,6 +68,16 @@ export type {
   LocalShellAction,
 } from "./localShell.js";
 
+import { shell } from "./shell.js";
+export type {
+  ShellTool,
+  ShellOptions,
+  ShellAction,
+  ShellResult,
+  ShellCommandOutput,
+  ShellCallOutcome,
+} from "./shell.js";
+
 import { applyPatch } from "./applyPatch.js";
 export type {
   ApplyPatchTool,
@@ -86,5 +96,6 @@ export const tools = {
   imageGeneration,
   computerUse,
   localShell,
+  shell,
   applyPatch,
 };
@@ -0,0 +1,266 @@
+import { OpenAI as OpenAIClient } from "openai";
+import { tool } from "@langchain/core/tools";
+
+/**
+ * Re-export action type from OpenAI SDK for convenience.
+ * The action contains command details like commands array, timeout, and max output length.
+ */
+export type ShellAction =
+  OpenAIClient.Responses.ResponseFunctionShellToolCall.Action;
+
+/**
+ * Result of a single shell command execution.
+ * Contains stdout, stderr, and the outcome (exit code or timeout).
+ */
+export type ShellCommandOutput =
+  OpenAIClient.Responses.ResponseFunctionShellCallOutputContent;
+
+/**
+ * Outcome type for shell command execution - either exit with code or timeout.
+ */
+export type ShellCallOutcome = ShellCommandOutput["outcome"];
+
+/**
+ * Result of executing shell commands.
+ * Contains an array of outputs (one per command) and the max_output_length parameter.
+ */
+export interface ShellResult {
+  /**
+   * Array of command outputs. Each entry corresponds to a command from the action.
+   * The order should match the order of commands in the action.
+   */
+  output: ShellCommandOutput[];
+  /**
+   * The max_output_length from the action, which must be passed back to the API.
+   * If not provided in the action, can be omitted.
+   */
+  maxOutputLength?: number | null;
+}
+
+/**
+ * Options for the Shell tool.
+ */
+export interface ShellOptions {
+  /**
+   * Execute function that handles shell command execution.
+   * This function receives the action input containing the commands and limits,
+   * and should return a ShellResult with stdout, stderr, and outcome for each command.
+   *
+   * @example
+   * ```typescript
+   * execute: async (action) => {
+   *   const outputs = await Promise.all(
+   *     action.commands.map(async (cmd) => {
+   *       try {
+   *         const { stdout, stderr } = await exec(cmd, {
+   *           timeout: action.timeout_ms ?? undefined,
+   *         });
+   *         return {
+   *           stdout,
+   *           stderr,
+   *           outcome: { type: "exit" as const, exit_code: 0 },
+   *         };
+   *       } catch (error) {
+   *         const timedOut = error.killed && error.signal === "SIGTERM";
+   *         return {
+   *           stdout: error.stdout ?? "",
+   *           stderr: error.stderr ?? String(error),
+   *           outcome: timedOut
+   *             ? { type: "timeout" as const }
+   *             : { type: "exit" as const, exit_code: error.code ?? 1 },
+   *         };
+   *       }
+   *     })
+   *   );
+   *   return {
+   *     output: outputs,
+   *     maxOutputLength: action.max_output_length,
+   *   };
+   * }
+   * ```
+   */
+  execute: (action: ShellAction) => ShellResult | Promise<ShellResult>;
+}
+
+/**
+ * OpenAI Shell tool type for the Responses API.
+ */
+export type ShellTool = OpenAIClient.Responses.FunctionShellTool;
+
+const TOOL_NAME = "shell";
+
+/**
+ * Creates a Shell tool that allows models to run shell commands through your integration.
+ *
+ * The shell tool allows the model to interact with your local computer through a controlled
+ * command-line interface. The model proposes shell commands; your integration executes them
+ * and returns the outputs. This creates a simple plan-execute loop that lets models inspect
+ * the system, run utilities, and gather data until they can finish the task.
+ *
+ * **Important**: The shell tool is available through the Responses API for use with `GPT-5.1`.
+ * It is not available on other models, or via the Chat Completions API.
+ *
+ * **When to use**:
+ * - **Automating filesystem or process diagnostics** – For example, "find the largest PDF
+ *   under ~/Documents" or "show running gunicorn processes."
+ * - **Extending the model's capabilities** – Using built-in UNIX utilities, python runtime
+ *   and other CLIs in your environment.
+ * - **Running multi-step build and test flows** – Chaining commands like `pip install` and `pytest`.
+ * - **Complex agentic coding workflows** – Using other tools like `apply_patch` to complete
+ *   workflows that involve complex file operations.
+ *
+ * **How it works**:
+ * The tool operates in a continuous loop:
+ * 1. Model sends shell commands (`shell_call` with `commands` array)
+ * 2. Your code executes the commands (can be concurrent)
+ * 3. You return stdout, stderr, and outcome for each command
+ * 4. Repeat until the task is complete
+ *
+ * **Security Warning**: Running arbitrary shell commands can be dangerous.
+ * Always sandbox execution or add strict allow/deny-lists before forwarding
+ * a command to the system shell.
+ *
+ * @see {@link https://platform.openai.com/docs/guides/tools-shell | OpenAI Shell Documentation}
+ * @see {@link https://github.com/openai/codex | Codex CLI} for reference implementation.
+ *
+ * @param options - Configuration for the Shell tool
+ * @returns A Shell tool that can be passed to `bindTools`
+ *
+ * @example
+ * ```typescript
+ * import { ChatOpenAI, tools } from "@langchain/openai";
+ * import { exec } from "child_process/promises";
+ *
+ * const model = new ChatOpenAI({ model: "gpt-5.1" });
+ *
+ * // With execute callback for automatic command handling
+ * const shellTool = tools.shell({
+ *   execute: async (action) => {
+ *     const outputs = await Promise.all(
+ *       action.commands.map(async (cmd) => {
+ *         try {
+ *           const { stdout, stderr } = await exec(cmd, {
+ *             timeout: action.timeout_ms ?? undefined,
+ *           });
+ *           return {
+ *             stdout,
+ *             stderr,
+ *             outcome: { type: "exit" as const, exit_code: 0 },
+ *           };
+ *         } catch (error) {
+ *           const timedOut = error.killed && error.signal === "SIGTERM";
+ *           return {
+ *             stdout: error.stdout ?? "",
+ *             stderr: error.stderr ?? String(error),
+ *             outcome: timedOut
+ *               ? { type: "timeout" as const }
+ *               : { type: "exit" as const, exit_code: error.code ?? 1 },
+ *           };
+ *         }
+ *       })
+ *     );
+ *     return {
+ *       output: outputs,
+ *       maxOutputLength: action.max_output_length,
+ *     };
+ *   },
+ * });
+ *
+ * const llmWithShell = model.bindTools([shellTool]);
+ * const response = await llmWithShell.invoke(
+ *   "Find the largest PDF file in ~/Documents"
+ * );
+ * ```
+ *
+ * @example
+ * ```typescript
+ * // Full shell loop example
+ * async function shellLoop(model, task) {
+ *   let response = await model.invoke(task, {
+ *     tools: [tools.shell({ execute: myExecutor })],
+ *   });
+ *
+ *   while (true) {
+ *     const shellCall = response.additional_kwargs.tool_outputs?.find(
+ *       (output) => output.type === "shell_call"
+ *     );
+ *
+ *     if (!shellCall) break;
+ *
+ *     // Execute commands (with proper sandboxing!)
+ *     const result = await executeCommands(shellCall.action);
+ *
+ *     // Send output back to model
+ *     response = await model.invoke([
+ *       response,
+ *       {
+ *         type: "shell_call_output",
+ *         call_id: shellCall.call_id,
+ *         output: result.output,
+ *         max_output_length: result.maxOutputLength,
+ *       },
+ *     ], {
+ *       tools: [tools.shell({ execute: myExecutor })],
+ *     });
+ *   }
+ *
+ *   return response;
+ * }
+ * ```
+ *
+ * @remarks
+ * - Only available through the Responses API (not Chat Completions)
+ * - Designed for use with `gpt-5.1` model
+ * - Commands are provided as an array of strings that can be executed concurrently
+ * - Action includes: `commands`, `timeout_ms`, `max_output_length`
+ * - Always sandbox or validate commands before execution
+ * - The `timeout_ms` from the model is only a hint—enforce your own limits
+ * - If `max_output_length` exists in the action, always pass it back in the output
+ * - Many CLI tools return non-zero exit codes for warnings; still capture stdout/stderr
+ */
+export function shell(options: ShellOptions) {
+  // Wrapper that converts ShellResult to string for LangChain tool compatibility
+  const executeWrapper = async (action: ShellAction): Promise<string> => {
+    const result = await options.execute(action);
+    // Return a JSON string representation for the tool result
+    return JSON.stringify({
+      output: result.output,
+      max_output_length: result.maxOutputLength,
+    });
+  };
+
+  const shellTool = tool(executeWrapper, {
+    name: TOOL_NAME,
+    description:
+      "Execute shell commands in a managed environment. Commands can be run concurrently.",
+    schema: {
+      type: "object",
+      properties: {
+        commands: {
+          type: "array",
+          items: { type: "string" },
+          description: "Array of shell commands to execute",
+        },
+        timeout_ms: {
+          type: "number",
+          description: "Optional timeout in milliseconds for the commands",
+        },
+        max_output_length: {
+          type: "number",
+          description:
+            "Optional maximum number of characters to return from each command",
+        },
+      },
+      required: ["commands"],
+    },
+  });
+
+  shellTool.extras = {
+    ...(shellTool.extras ?? {}),
+    providerToolDefinition: {
+      type: "shell",
+    } as ShellTool,
+  };
+
+  return shellTool;
+}
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"@langchain/openai": minor
 +---
++
 +feat(openai): add support for shell tool