Skip to content

Commit db3af76

Browse files
christian-bromannjonghwanhyeon
authored andcommitted
feat(openai): add computer use tool
1 parent 5a036a4 commit db3af76

File tree

13 files changed

+6816
-4923
lines changed

13 files changed

+6816
-4923
lines changed

.changeset/wet-drinks-remain.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@langchain/openai": minor
3+
---
4+
5+
add support for computer use tool

libs/providers/langchain-openai/README.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,62 @@ Supported models: `gpt-4o`, `gpt-4o-mini`, `gpt-4.1`, `gpt-4.1-mini`, `gpt-4.1-n
408408

409409
For more information, see [OpenAI's Image Generation Documentation](https://platform.openai.com/docs/guides/tools-image-generation).
410410

411+
### Computer Use Tool
412+
413+
The Computer Use tool allows models to control computer interfaces by simulating mouse clicks, keyboard input, scrolling, and more. It uses OpenAI's Computer-Using Agent (CUA) model to understand screenshots and suggest actions.
414+
415+
> **Beta**: Computer use is in beta. Use in sandboxed environments only and do not use for high-stakes or authenticated tasks. Always implement human-in-the-loop for important decisions.
416+
417+
**How it works**: The tool operates in a continuous loop:
418+
419+
1. Model sends computer actions (click, type, scroll, etc.)
420+
2. Your code executes these actions in a controlled environment
421+
3. You capture a screenshot of the result
422+
4. Send the screenshot back to the model
423+
5. Repeat until the task is complete
424+
425+
```typescript
426+
import { ChatOpenAI, tools } from "@langchain/openai";
427+
428+
const model = new ChatOpenAI({ model: "computer-use-preview" });
429+
430+
// With execute callback for automatic action handling
431+
const computer = tools.computerUse({
432+
displayWidth: 1024,
433+
displayHeight: 768,
434+
environment: "browser",
435+
execute: async (action) => {
436+
if (action.type === "screenshot") {
437+
return captureScreenshot();
438+
}
439+
if (action.type === "click") {
440+
await page.mouse.click(action.x, action.y, { button: action.button });
441+
return captureScreenshot();
442+
}
443+
if (action.type === "type") {
444+
await page.keyboard.type(action.text);
445+
return captureScreenshot();
446+
}
447+
if (action.type === "scroll") {
448+
await page.mouse.move(action.x, action.y);
449+
await page.evaluate(
450+
`window.scrollBy(${action.scroll_x}, ${action.scroll_y})`
451+
);
452+
return captureScreenshot();
453+
}
454+
// Handle other actions...
455+
return captureScreenshot();
456+
},
457+
});
458+
459+
const llmWithComputer = model.bindTools([computer]);
460+
const response = await llmWithComputer.invoke(
461+
"Check the latest news on bing.com"
462+
);
463+
```
464+
465+
For more information, see [OpenAI's Computer Use Documentation](https://platform.openai.com/docs/guides/tools-computer-use).
466+
411467
## Embeddings
412468

413469
This package also adds support for OpenAI's embeddings model.

libs/providers/langchain-openai/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"@langchain/standard-tests": "workspace:*",
4949
"@langchain/tsconfig": "workspace:*",
5050
"@tsconfig/recommended": "^1.0.10",
51+
"@types/node": "^24.10.1",
5152
"@vitest/coverage-v8": "^3.2.4",
5253
"dotenv": "^17.2.1",
5354
"dpdm": "^3.14.0",
@@ -99,4 +100,4 @@
99100
"LICENSE"
100101
],
101102
"module": "./dist/index.js"
102-
}
103+
}

libs/providers/langchain-openai/src/azure/chat_models/tests/index.int.test.ts

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { test, expect } from "vitest";
1+
import { test, expect, vi, describe } from "vitest";
22
import { z } from "zod/v3";
33
import {
44
BaseMessage,
@@ -509,8 +509,8 @@ test("Test Azure ChatOpenAI Function calling with streaming", async () => {
509509

510510
test("Test Azure ChatOpenAI can cache generations", async () => {
511511
const memoryCache = new InMemoryCache();
512-
const lookupSpy = jest.spyOn(memoryCache, "lookup");
513-
const updateSpy = jest.spyOn(memoryCache, "update");
512+
const lookupSpy = vi.spyOn(memoryCache, "lookup");
513+
const updateSpy = vi.spyOn(memoryCache, "update");
514514
const chat = new AzureChatOpenAI({
515515
model: "gpt-3.5-turbo",
516516
maxTokens: 10,
@@ -530,16 +530,16 @@ test("Test Azure ChatOpenAI can cache generations", async () => {
530530

531531
test("Test Azure ChatOpenAI can write and read cached generations", async () => {
532532
const memoryCache = new InMemoryCache();
533-
const lookupSpy = jest.spyOn(memoryCache, "lookup");
534-
const updateSpy = jest.spyOn(memoryCache, "update");
533+
const lookupSpy = vi.spyOn(memoryCache, "lookup");
534+
const updateSpy = vi.spyOn(memoryCache, "update");
535535

536536
const chat = new AzureChatOpenAI({
537537
model: "gpt-3.5-turbo",
538538
maxTokens: 100,
539539
n: 1,
540540
cache: memoryCache,
541541
});
542-
const generateUncachedSpy = jest.spyOn(chat, "_generateUncached");
542+
const generateUncachedSpy = vi.spyOn(chat, "_generateUncached");
543543

544544
const messages = [
545545
[
@@ -568,8 +568,8 @@ test("Test Azure ChatOpenAI can write and read cached generations", async () =>
568568

569569
test("Test Azure ChatOpenAI should not reuse cache if function call args have changed", async () => {
570570
const memoryCache = new InMemoryCache();
571-
const lookupSpy = jest.spyOn(memoryCache, "lookup");
572-
const updateSpy = jest.spyOn(memoryCache, "update");
571+
const lookupSpy = vi.spyOn(memoryCache, "lookup");
572+
const updateSpy = vi.spyOn(memoryCache, "update");
573573

574574
const chat = new AzureChatOpenAI({
575575
model: "gpt-3.5-turbo",
@@ -578,7 +578,7 @@ test("Test Azure ChatOpenAI should not reuse cache if function call args have ch
578578
cache: memoryCache,
579579
});
580580

581-
const generateUncachedSpy = jest.spyOn(chat, "_generateUncached");
581+
const generateUncachedSpy = vi.spyOn(chat, "_generateUncached");
582582

583583
const messages = [
584584
[

libs/providers/langchain-openai/src/chat_models/tests/index-extended.int.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ test("Test ChatOpenAI streaming logprobs", async () => {
114114
logprobs: true,
115115
});
116116
const res = await model.invoke("Print hello world.");
117-
// console.log(res.response_metadata.logprobs.content);
118-
expect(res.response_metadata.logprobs.content.length).toBeGreaterThan(0);
117+
expect(
118+
(res.response_metadata.logprobs as any).content.length
119+
).toBeGreaterThan(0);
119120
});
120121

121122
test("Test ChatOpenAI with search preview model", async () => {
@@ -172,7 +173,6 @@ test("Test ChatOpenAI tool calling with ToolMessages", async () => {
172173
const res = await chat.invoke([
173174
["human", "What's the weather like in San Francisco, Tokyo, and Paris?"],
174175
]);
175-
// console.log(JSON.stringify(res));
176176
expect(res.additional_kwargs.tool_calls?.length).toBeGreaterThan(1);
177177
const toolMessages = res.additional_kwargs.tool_calls!.map(
178178
(toolCall) =>
@@ -194,7 +194,6 @@ test("Test ChatOpenAI tool calling with ToolMessages", async () => {
194194
toolError = e;
195195
}
196196
expect(toolError).toBeDefined();
197-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
198197
expect((toolError as any)?.lc_error_code).toEqual("INVALID_TOOL_RESULTS");
199198
// @eslint-disable-next-line/@typescript-eslint/ban-ts-comment
200199
// @ts-expect-error unused var
@@ -642,22 +641,23 @@ test.skip("system prompt caching", async () => {
642641
},
643642
];
644643
const res = await model.invoke(messages);
645-
expect(res.response_metadata?.usage.prompt_tokens_details.cached_tokens).toBe(
646-
0
647-
);
644+
expect(
645+
(res.response_metadata?.usage as any).prompt_tokens_details.cached_tokens
646+
).toBe(0);
648647
await new Promise((resolve) => {
649648
setTimeout(resolve, 5000);
650649
});
651650
const res2 = await model.invoke(messages);
652651
expect(
653-
res2.response_metadata?.usage.prompt_tokens_details.cached_tokens
652+
(res2.response_metadata?.usage as any).prompt_tokens_details.cached_tokens
654653
).toBeGreaterThan(0);
655654
let aggregate;
656655
for await (const chunk of await model.stream(messages)) {
657656
aggregate = aggregate ? concat(aggregate, chunk) : chunk;
658657
}
659658
expect(
660-
aggregate?.response_metadata?.usage.prompt_tokens_details.cached_tokens
659+
(aggregate?.response_metadata?.usage as any).prompt_tokens_details
660+
.cached_tokens
661661
).toBeGreaterThan(0);
662662
});
663663

@@ -707,11 +707,11 @@ public class User
707707
}
708708
);
709709
expect(
710-
typeof res.response_metadata?.usage?.completion_tokens_details
710+
typeof (res.response_metadata?.usage as any).completion_tokens_details
711711
.accepted_prediction_tokens
712712
).toBe("number");
713713
expect(
714-
typeof res.response_metadata?.usage?.completion_tokens_details
714+
typeof (res.response_metadata?.usage as any).completion_tokens_details
715715
.rejected_prediction_tokens
716716
).toBe("number");
717717
});

0 commit comments

Comments
 (0)